From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id BE8AEA10DA for ; Fri, 2 Aug 2019 03:19:37 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id EC6041C22A; Fri, 2 Aug 2019 03:18:30 +0200 (CEST) Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id C6EA01C228 for ; Fri, 2 Aug 2019 03:18:29 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga008.jf.intel.com ([10.7.209.65]) by fmsmga102.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 01 Aug 2019 18:18:28 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.64,336,1559545200"; d="scan'208";a="167087465" Received: from dpdk-rosen-02.sh.intel.com ([10.67.111.116]) by orsmga008.jf.intel.com with ESMTP; 01 Aug 2019 18:18:27 -0700 From: Rosen Xu To: dev@dpdk.org Cc: ferruh.yigit@intel.com, tianfei.zhang@intel.com, rosen.xu@intel.com, andy.pei@intel.com, david.lomartire@intel.com, qi.z.zhang@intel.com, xiaolong.ye@intel.com Date: Fri, 2 Aug 2019 09:18:46 +0800 Message-Id: <1564708727-164887-12-git-send-email-rosen.xu@intel.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1564708727-164887-1-git-send-email-rosen.xu@intel.com> References: <1564556752-19257-2-git-send-email-rosen.xu@intel.com> <1564708727-164887-1-git-send-email-rosen.xu@intel.com> Subject: [dpdk-dev] [PATCH v2 11/12] raw/ifpga_rawdev: add PCIe BDF devices tree scan X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add PCIe BDF devices tree scan for ipn3ke. Signed-off-by: Rosen Xu --- drivers/raw/ifpga_rawdev/ifpga_rawdev.c | 553 +++++++++++++++++++++++++++++++- drivers/raw/ifpga_rawdev/ifpga_rawdev.h | 16 + 2 files changed, 562 insertions(+), 7 deletions(-) diff --git a/drivers/raw/ifpga_rawdev/ifpga_rawdev.c b/drivers/raw/ifpga_rawdev/ifpga_rawdev.c index 7121884..16f8387 100644 --- a/drivers/raw/ifpga_rawdev/ifpga_rawdev.c +++ b/drivers/raw/ifpga_rawdev/ifpga_rawdev.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -18,7 +20,7 @@ #include #include #include - +#include #include #include #include @@ -26,6 +28,7 @@ #include #include #include +#include #include "base/opae_hw_api.h" #include "base/opae_ifpga_hw_api.h" @@ -38,6 +41,12 @@ #include "ifpga_rawdev.h" #include "ipn3ke_rawdev_api.h" +#define RTE_PCI_EXT_CAP_ID_ERR 0x01 /* Advanced Error Reporting */ +#define RTE_PCI_CFG_SPACE_SIZE 256 +#define RTE_PCI_CFG_SPACE_EXP_SIZE 4096 +#define RTE_PCI_EXT_CAP_ID(header) (int)(header & 0x0000ffff) +#define RTE_PCI_EXT_CAP_NEXT(header) ((header >> 20) & 0xffc) + int ifpga_rawdev_logtype; #define PCI_VENDOR_ID_INTEL 0x8086 @@ -65,6 +74,493 @@ { .vendor_id = 0, /* sentinel */ }, }; +static struct ifpga_rawdev ifpga_rawdevices[IFPGA_RAWDEV_NUM]; + +static int ifpga_monitor_start; +static pthread_t ifpga_monitor_start_thread; + +static struct ifpga_rawdev * +ifpga_rawdev_allocate(struct rte_rawdev *rawdev); +static int set_surprise_link_check_aer(struct ifpga_rawdev *ifpga_rdev); +static int ifpga_pci_find_next_ext_capability(unsigned int fd, +int start, int cap); +static int ifpga_pci_find_ext_capability(unsigned int fd, int cap); + +struct ifpga_rawdev * +ifpga_rawdev_get(const struct rte_rawdev *rawdev) +{ + struct ifpga_rawdev *dev; + unsigned int i; + + if (rawdev == NULL) + return NULL; + + for (i = 0; i < IFPGA_RAWDEV_NUM; i++) { + dev = &ifpga_rawdevices[i]; + if (dev->rawdev == rawdev) + return dev; + } + + return NULL; +} + +static inline uint8_t +ifpga_rawdev_find_free_device_index(void) +{ + uint16_t dev_id; + + for (dev_id = 0; dev_id < IFPGA_RAWDEV_NUM; dev_id++) { + if (ifpga_rawdevices[dev_id].rawdev == NULL) + return dev_id; + } + + return IFPGA_RAWDEV_NUM; +} +static struct ifpga_rawdev * +ifpga_rawdev_allocate(struct rte_rawdev *rawdev) +{ + struct ifpga_rawdev *dev; + uint16_t dev_id; + + dev = ifpga_rawdev_get(rawdev); + if (dev != NULL) { + IFPGA_RAWDEV_PMD_ERR("Event device already allocated!"); + return NULL; + } + + dev_id = ifpga_rawdev_find_free_device_index(); + if (dev_id == IFPGA_RAWDEV_NUM) { + IFPGA_RAWDEV_PMD_ERR("Reached maximum number of raw devices"); + return NULL; + } + + dev = &ifpga_rawdevices[dev_id]; + dev->rawdev = rawdev; + dev->dev_id = dev_id; + + return dev; +} + +static int ifpga_pci_find_next_ext_capability(unsigned int fd, +int start, int cap) +{ + uint32_t header; + int ttl; + int pos = RTE_PCI_CFG_SPACE_SIZE; + int ret; + + /* minimum 8 bytes per capability */ + ttl = (RTE_PCI_CFG_SPACE_EXP_SIZE - RTE_PCI_CFG_SPACE_SIZE) / 8; + + if (start) + pos = start; + ret = pread(fd, &header, sizeof(header), pos); + if (ret == -1) + return -1; + + /* + * If we have no capabilities, this is indicated by cap ID, + * cap version and next pointer all being 0. + */ + if (header == 0) + return 0; + + while (ttl-- > 0) { + if (RTE_PCI_EXT_CAP_ID(header) == cap && pos != start) + return pos; + + pos = RTE_PCI_EXT_CAP_NEXT(header); + if (pos < RTE_PCI_CFG_SPACE_SIZE) + break; + ret = pread(fd, &header, sizeof(header), pos); + if (ret == -1) + return -1; + } + + return 0; +} + +static int ifpga_pci_find_ext_capability(unsigned int fd, int cap) +{ + return ifpga_pci_find_next_ext_capability(fd, 0, cap); +} + +static int ifpga_get_dev_vendor_id(const char *bdf, + uint32_t *dev_id, uint32_t *vendor_id) +{ + int fd; + char path[1024]; + int ret; + uint32_t header; + + strlcpy(path, "/sys/bus/pci/devices/", sizeof(path)); + strlcat(path, bdf, sizeof(path)); + strlcat(path, "/config", sizeof(path)); + fd = open(path, O_RDWR); + if (fd < 0) + return -1; + ret = pread(fd, &header, sizeof(header), 0); + if (ret == -1) { + close(fd); + return -1; + } + (*vendor_id) = header & 0xffff; + (*dev_id) = (header >> 16) & 0xffff; + close(fd); + + return 0; +} +static int ifpga_rawdev_fill_info(struct ifpga_rawdev *ifpga_dev, + const char *bdf) +{ + char path[1024] = "/sys/bus/pci/devices/0000:"; + char link[1024], link1[1024]; + char dir[1024] = "/sys/devices/"; + char *c; + int ret; + char sub_brg_bdf[4][16]; + int point; + DIR *dp = NULL; + struct dirent *entry; + int i, j; + + unsigned int dom, bus, dev; + int func; + uint32_t dev_id, vendor_id; + + strlcat(path, bdf, sizeof(path)); + memset(link, 0, sizeof(link)); + memset(link1, 0, sizeof(link1)); + ret = readlink(path, link, (sizeof(link)-1)); + if (ret == -1) + return -1; + strlcpy(link1, link, sizeof(link1)); + memset(ifpga_dev->parent_bdf, 0, 16); + point = strlen(link); + if (point < 39) + return -1; + point -= 39; + link[point] = 0; + if (point < 12) + return -1; + point -= 12; + rte_memcpy(ifpga_dev->parent_bdf, &link[point], 12); + + point = strlen(link1); + if (point < 26) + return -1; + point -= 26; + link1[point] = 0; + if (point < 12) + return -1; + point -= 12; + c = strchr(link1, 'p'); + if (!c) + return -1; + strlcat(dir, c, sizeof(dir)); + + //scan folder + dp = opendir(dir); + if (dp == NULL) + return -1; + i = 0; + while ((entry = readdir(dp)) != NULL) { + if (i >= 4) + break; + if (entry->d_name[0] == '.') + continue; + if (strlen(entry->d_name) > 12) + continue; + if (sscanf(entry->d_name, "%x:%x:%x.%d", + &dom, &bus, &dev, &func) < 4) + continue; + else { + strlcpy(sub_brg_bdf[i], + entry->d_name, + sizeof(sub_brg_bdf[i])); + i++; + } + } + closedir(dp); + + //get fpga and fvl + j = 0; + for (i = 0; i < 4; i++) { + strlcpy(link, dir, sizeof(link)); + strlcat(link, "/", sizeof(link)); + strlcat(link, sub_brg_bdf[i], sizeof(link)); + dp = opendir(link); + if (dp == NULL) + return -1; + while ((entry = readdir(dp)) != NULL) { + if (j >= 8) + break; + if (entry->d_name[0] == '.') + continue; + + if (strlen(entry->d_name) > 12) + continue; + if (sscanf(entry->d_name, "%x:%x:%x.%d", + &dom, &bus, &dev, &func) < 4) + continue; + else { + if (ifpga_get_dev_vendor_id(entry->d_name, + &dev_id, &vendor_id)) + continue; + if (vendor_id == 0x8086 && + (dev_id == 0x0CF8 || + dev_id == 0x0D58 || + dev_id == 0x1580)) { + strlcpy(ifpga_dev->fvl_bdf[j], + entry->d_name, + sizeof(ifpga_dev->fvl_bdf[j])); + j++; + } + } + } + closedir(dp); + } + + return 0; +} + +#define HIGH_FATAL(_sens, value)\ + (((_sens)->flags & OPAE_SENSOR_HIGH_FATAL_VALID) &&\ + (value > (_sens)->high_fatal)) + +#define HIGH_WARN(_sens, value)\ + (((_sens)->flags & OPAE_SENSOR_HIGH_WARN_VALID) &&\ + (value > (_sens)->high_warn)) + +#define LOW_FATAL(_sens, value)\ + (((_sens)->flags & OPAE_SENSOR_LOW_FATAL_VALID) &&\ + (value > (_sens)->low_fatal)) + +#define LOW_WARN(_sens, value)\ + (((_sens)->flags & OPAE_SENSOR_LOW_WARN_VALID) &&\ + (value > (_sens)->low_warn)) + +#define AUX_VOLTAGE_WARN 11400 + +static int +ifpga_monitor_sensor(struct rte_rawdev *raw_dev, + bool *gsd_start) +{ + struct opae_adapter *adapter; + struct opae_manager *mgr; + struct opae_sensor_info *sensor; + unsigned int value; + int ret; + + adapter = ifpga_rawdev_get_priv(raw_dev); + if (!adapter) + return -ENODEV; + + mgr = opae_adapter_get_mgr(adapter); + if (!mgr) + return -ENODEV; + + opae_mgr_for_each_sensor(sensor) { + if (!sensor) + goto fail; + + if (!(sensor->flags & OPAE_SENSOR_VALID)) + goto fail; + + ret = opae_mgr_get_sensor_value(mgr, sensor, &value); + if (ret) + goto fail; + + if (value == 0xdeadbeef) { + IFPGA_RAWDEV_PMD_ERR("sensor is invalid value %x\n", + value); + continue; + } + + /* monitor temperature sensors */ + if (!strcmp(sensor->name, "Board Temperature") || + !strcmp(sensor->name, "FPGA Die Temperature")) { + IFPGA_RAWDEV_PMD_INFO("read sensor %s %d %d %d\n", + sensor->name, value, sensor->high_warn, + sensor->high_fatal); + + if (HIGH_WARN(sensor, value) || + LOW_WARN(sensor, value)) { + IFPGA_RAWDEV_PMD_INFO("%s reach theshold %d\n", + sensor->name, value); + *gsd_start = true; + break; + } + } + + /* monitor 12V AUX sensor */ + if (!strcmp(sensor->name, "12V AUX Voltage")) { + if (value < AUX_VOLTAGE_WARN) { + IFPGA_RAWDEV_PMD_INFO("%s reach theshold %d\n", + sensor->name, value); + *gsd_start = true; + break; + } + } + } + + return 0; +fail: + return -EFAULT; +} + +static int set_surprise_link_check_aer(struct ifpga_rawdev *ifpga_rdev) +{ + struct rte_rawdev *rdev; + int fd = -1; + char path[1024]; + int pos; + int ret; + uint32_t data; + bool enable = 0; + uint32_t aer_new0, aer_new1; + + if (!ifpga_rdev) { + printf("\n device does not exist\n"); + return -EFAULT; + } + + rdev = ifpga_rdev->rawdev; + if (ifpga_rdev->aer_enable) + return -EFAULT; + if (ifpga_monitor_sensor(rdev, &enable)) + return -EFAULT; + if (enable) { + IFPGA_RAWDEV_PMD_ERR("Set AER, pls graceful shutdown\n"); + ifpga_rdev->aer_enable = 1; + //get bridge fd + strlcpy(path, "/sys/bus/pci/devices/", sizeof(path)); + strlcat(path, ifpga_rdev->parent_bdf, sizeof(path)); + strlcat(path, "/config", sizeof(path)); + fd = open(path, O_RDWR); + if (fd < 0) + goto end; + pos = ifpga_pci_find_ext_capability(fd, RTE_PCI_EXT_CAP_ID_ERR); + if (!pos) + goto end; + //save previout ECAP_AER+0x08 + ret = pread(fd, &data, sizeof(data), pos+0x08); + if (ret == -1) + goto end; + ifpga_rdev->aer_old[0] = data; + //save previout ECAP_AER+0x14 + ret = pread(fd, &data, sizeof(data), pos+0x14); + if (ret == -1) + goto end; + ifpga_rdev->aer_old[1] = data; + + //set ECAP_AER+0x08 to 0xFFFFFFFF + data = 0xffffffff; + ret = pwrite(fd, &data, 4, pos+0x08); + if (ret == -1) + goto end; + //set ECAP_AER+0x14 to 0xFFFFFFFF + ret = pwrite(fd, &data, 4, pos+0x14); + if (ret == -1) + goto end; + + //read current ECAP_AER+0x08 + ret = pread(fd, &data, sizeof(data), pos+0x08); + if (ret == -1) + goto end; + aer_new0 = data; + //read current ECAP_AER+0x14 + ret = pread(fd, &data, sizeof(data), pos+0x14); + if (ret == -1) + goto end; + aer_new1 = data; + + if (fd != -1) + close(fd); + + printf(">>>>>>Set AER %x,%x %x,%x\n", + ifpga_rdev->aer_old[0], ifpga_rdev->aer_old[1], + aer_new0, aer_new1); + + return 1; + } + +end: + if (fd != -1) + close(fd); + return -EFAULT; +} + +static void * +ifpga_rawdev_gsd_handle(__rte_unused void *param) +{ + struct ifpga_rawdev *ifpga_rdev; + int i; + int gsd_enable, ret; +#define MS 1000 + + while (1) { + gsd_enable = 0; + for (i = 0; i < IFPGA_RAWDEV_NUM; i++) { + ifpga_rdev = &ifpga_rawdevices[i]; + if (ifpga_rdev->rawdev) { + printf(">>>>>>Check Device %s\n", + ifpga_rdev->rawdev->name); + ret = set_surprise_link_check_aer(ifpga_rdev); + if (ret == 1) + gsd_enable = 1; + } + } + + if (gsd_enable) + rte_exit(EXIT_FAILURE, ">>>>>>Graceful Shutdown\n"); + + rte_delay_us(1000 * MS); + } + + return NULL; +} + +static int +ifpga_monitor_start_func(void) +{ + int ret; + + if (ifpga_monitor_start == 0) { + ret = pthread_create(&ifpga_monitor_start_thread, + NULL, + ifpga_rawdev_gsd_handle, NULL); + if (ret) { + IFPGA_RAWDEV_PMD_ERR("Fail to create ifpga nonitor thread"); + return -1; + } + ifpga_monitor_start = 1; + } + + return 0; +} +static int +ifpga_monitor_stop_func(void) +{ + int ret; + + if (ifpga_monitor_start == 1) { + ret = pthread_cancel(ifpga_monitor_start_thread); + if (ret) + IFPGA_RAWDEV_PMD_ERR("Can't cancel the thread"); + + ret = pthread_join(ifpga_monitor_start_thread, NULL); + if (ret) + IFPGA_RAWDEV_PMD_ERR("Can't join the thread"); + + ifpga_monitor_start = 0; + + return ret; + } + + return 0; +} + static int ifpga_fill_afu_dev(struct opae_accelerator *acc, struct rte_afu_device *afu_dev) @@ -373,8 +869,9 @@ if (ret) return ret; - memcpy(&afu_pr_conf->afu_id.uuid.uuid_low, uuid.b, sizeof(u64)); - memcpy(&afu_pr_conf->afu_id.uuid.uuid_high, uuid.b + 8, sizeof(u64)); + rte_memcpy(&afu_pr_conf->afu_id.uuid.uuid_low, uuid.b, sizeof(u64)); + rte_memcpy(&afu_pr_conf->afu_id.uuid.uuid_high, + uuid.b + 8, sizeof(u64)); IFPGA_RAWDEV_PMD_INFO("%s: uuid_l=0x%lx, uuid_h=0x%lx\n", __func__, (unsigned long)afu_pr_conf->afu_id.uuid.uuid_low, @@ -644,7 +1141,6 @@ static int fme_err_read_seu_emr(struct opae_manager *mgr) { - struct feature_prop prop; u64 val; int ret; @@ -658,7 +1154,7 @@ if (ret) return -EINVAL; - IFPGA_RAWDEV_PMD_INFO("seu emr high: 0x%lx\n", prop.data); + IFPGA_RAWDEV_PMD_INFO("seu emr high: 0x%lx\n", val); return 0; } @@ -844,6 +1340,7 @@ static int ifpga_register_fme_interrupt(struct opae_manager *mgr) { int ret = 0; struct rte_rawdev *rawdev = NULL; + struct ifpga_rawdev *dev = NULL; struct opae_adapter *adapter = NULL; struct opae_manager *mgr = NULL; struct opae_adapter_data_pci *data = NULL; @@ -857,7 +1354,7 @@ static int ifpga_register_fme_interrupt(struct opae_manager *mgr) } memset(name, 0, sizeof(name)); - snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "IFPGA:%x:%02x.%x", + snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "IFPGA:%02x:%02x.%x", pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function); IFPGA_RAWDEV_PMD_INFO("Init %s on NUMA node %d", name, rte_socket_id()); @@ -871,6 +1368,14 @@ static int ifpga_register_fme_interrupt(struct opae_manager *mgr) goto cleanup; } + dev = ifpga_rawdev_allocate(rawdev); + if (dev == NULL) { + IFPGA_RAWDEV_PMD_ERR("Unable to allocate ifpga_rawdevice"); + ret = -EINVAL; + goto cleanup; + } + dev->aer_enable = 0; + /* alloc OPAE_FPGA_PCI data to register to OPAE hardware level API */ data = opae_adapter_data_alloc(OPAE_FPGA_PCI); if (!data) { @@ -989,6 +1494,7 @@ static int ifpga_register_fme_interrupt(struct opae_manager *mgr) static int ifpga_rawdev_pci_remove(struct rte_pci_device *pci_dev) { + ifpga_monitor_stop_func(); return ifpga_rawdev_destroy(pci_dev); } @@ -1020,13 +1526,32 @@ static int ifpga_register_fme_interrupt(struct opae_manager *mgr) NULL }; +static int ifpga_rawdev_get_string_arg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + int size; + if (!value || !extra_args) + return -EINVAL; + + size = strlen(value) + 1; + *(char **)extra_args = rte_malloc(NULL, size, RTE_CACHE_LINE_SIZE); + strlcpy(*(char **)extra_args, value, size); + + if (!*(char **)extra_args) + return -ENOMEM; + + return 0; +} static int ifpga_cfg_probe(struct rte_vdev_device *dev) { struct rte_devargs *devargs; struct rte_kvargs *kvlist = NULL; + struct rte_rawdev *rawdev = NULL; + struct ifpga_rawdev *ifpga_dev; int port; char *name = NULL; + const char *bdf; char dev_name[RTE_RAWDEV_NAME_MAX_LEN]; int ret = -1; @@ -1040,7 +1565,8 @@ static int ifpga_register_fme_interrupt(struct opae_manager *mgr) if (rte_kvargs_count(kvlist, IFPGA_ARG_NAME) == 1) { if (rte_kvargs_process(kvlist, IFPGA_ARG_NAME, - &rte_ifpga_get_string_arg, &name) < 0) { + &ifpga_rawdev_get_string_arg, + &name) < 0) { IFPGA_RAWDEV_PMD_ERR("error to parse %s", IFPGA_ARG_NAME); goto end; @@ -1067,6 +1593,19 @@ static int ifpga_register_fme_interrupt(struct opae_manager *mgr) } memset(dev_name, 0, sizeof(dev_name)); + snprintf(dev_name, RTE_RAWDEV_NAME_MAX_LEN, "IFPGA:%s", name); + rawdev = rte_rawdev_pmd_get_named_dev(dev_name); + if (!rawdev) + goto end; + ifpga_dev = ifpga_rawdev_get(rawdev); + if (!ifpga_dev) + goto end; + bdf = name; + ifpga_rawdev_fill_info(ifpga_dev, bdf); + + ifpga_monitor_start_func(); + + memset(dev_name, 0, sizeof(dev_name)); snprintf(dev_name, RTE_RAWDEV_NAME_MAX_LEN, "%d|%s", port, name); diff --git a/drivers/raw/ifpga_rawdev/ifpga_rawdev.h b/drivers/raw/ifpga_rawdev/ifpga_rawdev.h index e153dba..bd42083 100644 --- a/drivers/raw/ifpga_rawdev/ifpga_rawdev.h +++ b/drivers/raw/ifpga_rawdev/ifpga_rawdev.h @@ -46,4 +46,20 @@ enum ifpga_rawdev_device_state { return rawdev->dev_private; } +#define IFPGA_RAWDEV_MSIX_IRQ_NUM 7 +#define IFPGA_RAWDEV_NUM 32 + +struct ifpga_rawdev { + int dev_id; + struct rte_rawdev *rawdev; + int aer_enable; + int intr_fd[IFPGA_RAWDEV_MSIX_IRQ_NUM+1]; + uint32_t aer_old[2]; + char fvl_bdf[8][16]; + char parent_bdf[16]; +}; + +struct ifpga_rawdev * +ifpga_rawdev_get(const struct rte_rawdev *rawdev); + #endif /* _IFPGA_RAWDEV_H_ */ -- 1.8.3.1