From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by dpdk.org (Postfix) with ESMTP id BFE9C199C8 for ; Thu, 30 Nov 2017 19:42:11 +0100 (CET) Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by orsmga101.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 30 Nov 2017 10:42:10 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.45,341,1508828400"; d="scan'208";a="182109678" Received: from dpdk06.sh.intel.com ([10.67.110.196]) by fmsmga005.fm.intel.com with ESMTP; 30 Nov 2017 10:42:09 -0800 From: Jianfeng Tan To: dev@dpdk.org Cc: anatoly.burakov@intel.com, bruce.richardson@intel.com, konstantin.ananyev@intel.com, thomas@monjalon.net, Jianfeng Tan Date: Thu, 30 Nov 2017 18:44:10 +0000 Message-Id: <1512067450-59203-4-git-send-email-jianfeng.tan@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1512067450-59203-1-git-send-email-jianfeng.tan@intel.com> References: <1512067450-59203-1-git-send-email-jianfeng.tan@intel.com> Subject: [dpdk-dev] [PATCH 3/3] vfio: use the generic multi-process channel X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 30 Nov 2017 18:42:12 -0000 Previously, vfio has its own channel for the secondary process to get container fd and group fd from the primary process. This patch changes to use the generic mp channel. Signed-off-by: Jianfeng Tan --- lib/librte_eal/linuxapp/eal/eal.c | 14 +- lib/librte_eal/linuxapp/eal/eal_vfio.c | 139 +++------ lib/librte_eal/linuxapp/eal/eal_vfio.h | 15 +- lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c | 416 ++++--------------------- 4 files changed, 109 insertions(+), 475 deletions(-) diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index a84eab4..93824bf 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -713,18 +713,8 @@ static int rte_eal_vfio_setup(void) return -1; vfio_enabled = rte_vfio_is_enabled("vfio"); - if (vfio_enabled) { - - /* if we are primary process, create a thread to communicate with - * secondary processes. the thread will use a socket to wait for - * requests from secondary process to send open file descriptors, - * because VFIO does not allow multiple open descriptors on a group or - * VFIO container. - */ - if (internal_config.process_type == RTE_PROC_PRIMARY && - vfio_mp_sync_setup() < 0) - return -1; - } + if (vfio_enabled && vfio_mp_sync_setup() < 0) + return -1; return 0; } diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 58f0123..dbea350 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -68,9 +68,11 @@ int vfio_get_group_fd(int iommu_group_no) { int i; + int ret; int vfio_group_fd; char filename[PATH_MAX]; struct vfio_group *cur_grp; + struct vfio_mp_param p; /* check if we already have the group descriptor open */ for (i = 0; i < VFIO_MAX_GROUPS; i++) @@ -129,51 +131,21 @@ vfio_get_group_fd(int iommu_group_no) vfio_cfg.vfio_active_groups++; return vfio_group_fd; } - /* if we're in a secondary process, request group fd from the primary - * process via our socket - */ - else { - int socket_fd, ret; + /* For secondary process, request group fd from the primary */ - socket_fd = vfio_mp_sync_connect_to_primary(); + p.req = SOCKET_REQ_GROUP; + p.group_no = iommu_group_no; - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); - return -1; - } - if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) { - RTE_LOG(ERR, EAL, " cannot request container fd!\n"); - close(socket_fd); - return -1; - } - if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) { - RTE_LOG(ERR, EAL, " cannot send group number!\n"); - close(socket_fd); - return -1; - } - ret = vfio_mp_sync_receive_request(socket_fd); - switch (ret) { - case SOCKET_NO_FD: - close(socket_fd); - return 0; - case SOCKET_OK: - vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd); - /* if we got the fd, store it and return it */ - if (vfio_group_fd > 0) { - close(socket_fd); - cur_grp->group_no = iommu_group_no; - cur_grp->fd = vfio_group_fd; - vfio_cfg.vfio_active_groups++; - return vfio_group_fd; - } - /* fall-through on error */ - default: - RTE_LOG(ERR, EAL, " cannot get container fd!\n"); - close(socket_fd); - return -1; - } + ret = rte_eal_mp_sendmsg("vfio", &p, sizeof(p), NULL, 0, 1); + if (ret < 0) { + RTE_LOG(ERR, EAL, " cannot request group fd!\n"); + cur_grp->group_no = -1; + } else { + cur_grp->group_no = iommu_group_no; + vfio_cfg.vfio_active_groups++; } - return -1; + + return ret; } @@ -229,11 +201,12 @@ int clear_group(int vfio_group_fd) { int i; - int socket_fd, ret; + struct vfio_mp_param p; + + i = get_vfio_group_idx(vfio_group_fd); if (internal_config.process_type == RTE_PROC_PRIMARY) { - i = get_vfio_group_idx(vfio_group_fd); if (i < 0) return -1; vfio_cfg.vfio_groups[i].group_no = -1; @@ -243,44 +216,20 @@ clear_group(int vfio_group_fd) return 0; } - /* This is just for SECONDARY processes */ - socket_fd = vfio_mp_sync_connect_to_primary(); - - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); - return -1; - } + p.req = SOCKET_CLR_GROUP; + p.group_no = vfio_cfg.vfio_groups[i].group_no; - if (vfio_mp_sync_send_request(socket_fd, SOCKET_CLR_GROUP) < 0) { - RTE_LOG(ERR, EAL, " cannot request container fd!\n"); - close(socket_fd); + if (rte_eal_mp_sendmsg("vfio", &p, sizeof(p), NULL, 0, 1) < 0) { + RTE_LOG(ERR, EAL, "request primary to clear group fd, failed!\n"); return -1; } - if (vfio_mp_sync_send_request(socket_fd, vfio_group_fd) < 0) { - RTE_LOG(ERR, EAL, " cannot send group fd!\n"); - close(socket_fd); - return -1; - } + vfio_cfg.vfio_groups[i].group_no = -1; + vfio_cfg.vfio_groups[i].fd = -1; + vfio_cfg.vfio_groups[i].devices = 0; + vfio_cfg.vfio_active_groups--; - ret = vfio_mp_sync_receive_request(socket_fd); - switch (ret) { - case SOCKET_NO_FD: - RTE_LOG(ERR, EAL, " BAD VFIO group fd!\n"); - close(socket_fd); - break; - case SOCKET_OK: - close(socket_fd); - return 0; - case SOCKET_ERR: - RTE_LOG(ERR, EAL, " Socket error\n"); - close(socket_fd); - break; - default: - RTE_LOG(ERR, EAL, " UNKNOWN reply, %d\n", ret); - close(socket_fd); - } - return -1; + return 0; } int @@ -590,6 +539,7 @@ int vfio_get_container_fd(void) { int ret, vfio_container_fd; + struct vfio_mp_param p; /* if we're in a primary process, try to open the container */ if (internal_config.process_type == RTE_PROC_PRIMARY) { @@ -620,34 +570,17 @@ vfio_get_container_fd(void) } return vfio_container_fd; - } else { - /* - * if we're in a secondary process, request container fd from the - * primary process via our socket - */ - int socket_fd; - - socket_fd = vfio_mp_sync_connect_to_primary(); - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); - return -1; - } - if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) { - RTE_LOG(ERR, EAL, " cannot request container fd!\n"); - close(socket_fd); - return -1; - } - vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd); - if (vfio_container_fd < 0) { - RTE_LOG(ERR, EAL, " cannot get container fd!\n"); - close(socket_fd); - return -1; - } - close(socket_fd); - return vfio_container_fd; } - return -1; + /* For secondary process, request container fd from primary process */ + + p.req = SOCKET_REQ_CONTAINER; + + ret = rte_eal_mp_sendmsg("vfio", &p, sizeof(p), NULL, 0, 1); + if (ret < 0) + RTE_LOG(ERR, EAL, " cannot request container fd!\n"); + + return ret; } int diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h index ba7892b..7907c22 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -117,15 +117,6 @@ struct vfio_iommu_spapr_tce_info { #define VFIO_MAX_GROUPS 64 /* - * Function prototypes for VFIO multiprocess sync functions - */ -int vfio_mp_sync_send_request(int socket, int req); -int vfio_mp_sync_receive_request(int socket); -int vfio_mp_sync_send_fd(int socket, int fd); -int vfio_mp_sync_receive_fd(int socket); -int vfio_mp_sync_connect_to_primary(void); - -/* * we don't need to store device fd's anywhere since they can be obtained from * the group fd via an ioctl() call. */ @@ -190,6 +181,12 @@ int vfio_mp_sync_setup(void); #define SOCKET_NO_FD 0x1 #define SOCKET_ERR 0xFF +struct vfio_mp_param { + int req; + int result; + int group_no; +}; + #endif /* VFIO_PRESENT */ #endif /* EAL_VFIO_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c index b53ed7e..dfba58f 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,31 +31,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include -#include -#include -#include - -/* sys/un.h with __USE_MISC uses strlen, which is unsafe */ -#ifdef __USE_MISC -#define REMOVED_USE_MISC -#undef __USE_MISC -#endif -#include -/* make sure we redefine __USE_MISC only if it was previously undefined */ -#ifdef REMOVED_USE_MISC -#define __USE_MISC -#undef REMOVED_USE_MISC -#endif - #include -#include -#include #include +#include -#include "eal_filesystem.h" #include "eal_vfio.h" -#include "eal_thread.h" /** * @file @@ -66,360 +46,94 @@ #ifdef VFIO_PRESENT -#define SOCKET_PATH_FMT "%s/.%s_mp_socket" -#define CMSGLEN (CMSG_LEN(sizeof(int))) -#define FD_TO_CMSGHDR(fd, chdr) \ - do {\ - (chdr).cmsg_len = CMSGLEN;\ - (chdr).cmsg_level = SOL_SOCKET;\ - (chdr).cmsg_type = SCM_RIGHTS;\ - memcpy((chdr).__cmsg_data, &(fd), sizeof(fd));\ - } while (0) -#define CMSGHDR_TO_FD(chdr, fd) \ - memcpy(&(fd), (chdr).__cmsg_data, sizeof(fd)) - -static pthread_t socket_thread; -static int mp_socket_fd; - - -/* get socket path (/var/run if root, $HOME otherwise) */ -static void -get_socket_path(char *buffer, int bufsz) -{ - const char *dir = "/var/run"; - const char *home_dir = getenv("HOME"); - - if (getuid() != 0 && home_dir != NULL) - dir = home_dir; - - /* use current prefix as file path */ - snprintf(buffer, bufsz, SOCKET_PATH_FMT, dir, - internal_config.hugefile_prefix); -} - - - -/* - * data flow for socket comm protocol: - * 1. client sends SOCKET_REQ_CONTAINER or SOCKET_REQ_GROUP - * 1a. in case of SOCKET_REQ_GROUP, client also then sends group number - * 2. server receives message - * 2a. in case of invalid group, SOCKET_ERR is sent back to client - * 2b. in case of unbound group, SOCKET_NO_FD is sent back to client - * 2c. in case of valid group, SOCKET_OK is sent and immediately followed by fd - * - * in case of any error, socket is closed. - */ - -/* send a request, return -1 on error */ -int -vfio_mp_sync_send_request(int socket, int req) -{ - struct msghdr hdr; - struct iovec iov; - int buf; - int ret; - - memset(&hdr, 0, sizeof(hdr)); - - buf = req; - - hdr.msg_iov = &iov; - hdr.msg_iovlen = 1; - iov.iov_base = (char *) &buf; - iov.iov_len = sizeof(buf); - - ret = sendmsg(socket, &hdr, 0); - if (ret < 0) - return -1; - return 0; -} - -/* receive a request and return it */ -int -vfio_mp_sync_receive_request(int socket) -{ - int buf; - struct msghdr hdr; - struct iovec iov; - int ret, req; - - memset(&hdr, 0, sizeof(hdr)); - - buf = SOCKET_ERR; - - hdr.msg_iov = &iov; - hdr.msg_iovlen = 1; - iov.iov_base = (char *) &buf; - iov.iov_len = sizeof(buf); - - ret = recvmsg(socket, &hdr, 0); - if (ret < 0) - return -1; - - req = buf; - - return req; -} - -/* send OK in message, fd in control message */ -int -vfio_mp_sync_send_fd(int socket, int fd) -{ - int buf; - struct msghdr hdr; - struct cmsghdr *chdr; - char chdr_buf[CMSGLEN]; - struct iovec iov; - int ret; - - chdr = (struct cmsghdr *) chdr_buf; - memset(chdr, 0, sizeof(chdr_buf)); - memset(&hdr, 0, sizeof(hdr)); - - hdr.msg_iov = &iov; - hdr.msg_iovlen = 1; - iov.iov_base = (char *) &buf; - iov.iov_len = sizeof(buf); - hdr.msg_control = chdr; - hdr.msg_controllen = CMSGLEN; - - buf = SOCKET_OK; - FD_TO_CMSGHDR(fd, *chdr); - - ret = sendmsg(socket, &hdr, 0); - if (ret < 0) - return -1; - return 0; -} - -/* receive OK in message, fd in control message */ -int -vfio_mp_sync_receive_fd(int socket) -{ - int buf; - struct msghdr hdr; - struct cmsghdr *chdr; - char chdr_buf[CMSGLEN]; - struct iovec iov; - int ret, req, fd; - - buf = SOCKET_ERR; - - chdr = (struct cmsghdr *) chdr_buf; - memset(chdr, 0, sizeof(chdr_buf)); - memset(&hdr, 0, sizeof(hdr)); - - hdr.msg_iov = &iov; - hdr.msg_iovlen = 1; - iov.iov_base = (char *) &buf; - iov.iov_len = sizeof(buf); - hdr.msg_control = chdr; - hdr.msg_controllen = CMSGLEN; - - ret = recvmsg(socket, &hdr, 0); - if (ret < 0) - return -1; - - req = buf; - - if (req != SOCKET_OK) - return -1; - - CMSGHDR_TO_FD(*chdr, fd); - - return fd; -} - -/* connect socket_fd in secondary process to the primary process's socket */ -int -vfio_mp_sync_connect_to_primary(void) +static int +vfio_mp_primary(const void *params, int len, + int fd[] __rte_unused, int fds_num __rte_unused) { - struct sockaddr_un addr; - socklen_t sockaddr_len; - int socket_fd; + int fds[1]; + const struct vfio_mp_param *p = params; + struct vfio_mp_param r; - /* set up a socket */ - socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, "Failed to create socket!\n"); + if (len != sizeof(*p)) { + RTE_LOG(ERR, EAL, "vfio received invalid message!\n"); return -1; } - get_socket_path(addr.sun_path, sizeof(addr.sun_path)); - addr.sun_family = AF_UNIX; - - sockaddr_len = sizeof(struct sockaddr_un); - - if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0) - return socket_fd; - - /* if connect failed */ - close(socket_fd); - return -1; -} - - - -/* - * socket listening thread for primary process - */ -static __attribute__((noreturn)) void * -vfio_mp_sync_thread(void __rte_unused * arg) -{ - int ret, fd, vfio_data; - - /* wait for requests on the socket */ - for (;;) { - int conn_sock; - struct sockaddr_un addr; - socklen_t sockaddr_len = sizeof(addr); - - /* this is a blocking call */ - conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr, - &sockaddr_len); - - /* just restart on error */ - if (conn_sock == -1) - continue; - - /* set socket to linger after close */ - struct linger l; - l.l_onoff = 1; - l.l_linger = 60; - - if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) < 0) - RTE_LOG(WARNING, EAL, "Cannot set SO_LINGER option " - "on listen socket (%s)\n", strerror(errno)); - - ret = vfio_mp_sync_receive_request(conn_sock); - - switch (ret) { - case SOCKET_REQ_CONTAINER: - fd = vfio_get_container_fd(); - if (fd < 0) - vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); - else - vfio_mp_sync_send_fd(conn_sock, fd); - if (fd >= 0) - close(fd); - break; - case SOCKET_REQ_GROUP: - /* wait for group number */ - vfio_data = vfio_mp_sync_receive_request(conn_sock); - if (vfio_data < 0) { - close(conn_sock); - continue; - } - - fd = vfio_get_group_fd(vfio_data); - - if (fd < 0) - vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); + switch (p->req) { + case SOCKET_REQ_GROUP: + r.req = SOCKET_REQ_GROUP; + r.group_no = p->group_no; + fds[0] = vfio_get_group_fd(p->group_no); + if (fds[0] < 0) { + r.result = SOCKET_ERR; + rte_eal_mp_sendmsg("vfio", &r, sizeof(r), NULL, 0, 0); + } else if (fds[0] == 0) { /* if VFIO group exists but isn't bound to VFIO driver */ - else if (fd == 0) - vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD); + r.result = SOCKET_NO_FD; + rte_eal_mp_sendmsg("vfio", &r, sizeof(r), NULL, 0, 0); + } else { /* if group exists and is bound to VFIO driver */ - else { - vfio_mp_sync_send_request(conn_sock, SOCKET_OK); - vfio_mp_sync_send_fd(conn_sock, fd); - } - break; - case SOCKET_CLR_GROUP: - /* wait for group fd */ - vfio_data = vfio_mp_sync_receive_request(conn_sock); - if (vfio_data < 0) { - close(conn_sock); - continue; - } - - ret = clear_group(vfio_data); - - if (ret < 0) - vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD); - else - vfio_mp_sync_send_request(conn_sock, SOCKET_OK); - break; - default: - vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); - break; + r.result = SOCKET_OK; + rte_eal_mp_sendmsg("vfio", &r, sizeof(r), fds, 1, 0); } - close(conn_sock); + break; + case SOCKET_REQ_CONTAINER: + r.req = SOCKET_REQ_CONTAINER; + fds[0] = vfio_get_container_fd(); + rte_eal_mp_sendmsg("vfio", &r, sizeof(r), fds, 1, 0); + break; + default: + RTE_LOG(ERR, EAL, "vfio received invalid message!\n"); + return -1; } + + return 0; } static int -vfio_mp_sync_socket_setup(void) +vfio_mp_secondary(const void *params, int len, int fds[], + int fds_num __rte_unused) { - int ret, socket_fd; - struct sockaddr_un addr; - socklen_t sockaddr_len; - - /* set up a socket */ - socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, "Failed to create socket!\n"); - return -1; - } - - get_socket_path(addr.sun_path, sizeof(addr.sun_path)); - addr.sun_family = AF_UNIX; - - sockaddr_len = sizeof(struct sockaddr_un); + const struct vfio_mp_param *p = params; - unlink(addr.sun_path); - - ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len); - if (ret) { - RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno)); - close(socket_fd); + if (len != sizeof(*p)) { + RTE_LOG(ERR, EAL, "vfio received invalid message!\n"); return -1; } - ret = listen(socket_fd, 50); - if (ret) { - RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno)); - close(socket_fd); + switch (p->req) { + case SOCKET_REQ_GROUP: + switch (p->result) { + case SOCKET_NO_FD: + return 0; + case SOCKET_OK: + if (fds_num == 1 && fds[0] > 0) + return fds[0]; + /* fall-through on error */ + default: + RTE_LOG(ERR, EAL, " cannot get group fd!\n"); + return -1; + } + case SOCKET_REQ_CONTAINER: + if (fds_num == 1 && fds[0] > 0) + return fds[0]; return -1; + default: + RTE_LOG(ERR, EAL, "Invalid req!\n"); } - - /* save the socket in local configuration */ - mp_socket_fd = socket_fd; - - return 0; + return -1; } -/* - * set up a local socket and tell it to listen for incoming connections - */ int vfio_mp_sync_setup(void) { - int ret; - char thread_name[RTE_MAX_THREAD_NAME_LEN]; + rte_eal_mp_t action; + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + action = vfio_mp_primary; + else + action = vfio_mp_secondary; - if (vfio_mp_sync_socket_setup() < 0) { - RTE_LOG(ERR, EAL, "Failed to set up local socket!\n"); - return -1; - } - - ret = pthread_create(&socket_thread, NULL, - vfio_mp_sync_thread, NULL); - if (ret) { - RTE_LOG(ERR, EAL, - "Failed to create thread for communication with secondary processes!\n"); - close(mp_socket_fd); - return -1; - } - - /* Set thread_name for aid in debugging. */ - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "vfio-sync"); - ret = rte_thread_setname(socket_thread, thread_name); - if (ret) - RTE_LOG(DEBUG, EAL, - "Failed to set thread name for secondary processes!\n"); - - return 0; + return rte_eal_mp_action_register("vfio", action); } #endif -- 2.7.4