From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-oi0-f47.google.com (mail-oi0-f47.google.com [209.85.218.47]) by dpdk.org (Postfix) with ESMTP id E5EDA3208 for ; Fri, 27 Feb 2015 11:33:59 +0100 (CET) Received: by mail-oi0-f47.google.com with SMTP id i138so14836164oig.6 for ; Fri, 27 Feb 2015 02:33:59 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:mime-version:in-reply-to:references:date :message-id:subject:from:to:cc:content-type; bh=+ff3/1Z0ZoaCe9TVM//MN4A3o/hIzABEDob79hg2hIc=; b=bR5JzLcaPTGYrXSAm89FQTurvSLqSZCekYnbzOPXf97WTzL1V1BD8gLvGCa2HKV8+Y QWzDrQKuHgztaJrdI1oTi5C8s+L92+j0McY6KFodJfj0BTrXixu9C8Q2VbmCXsZeopMO FQjFg2DS21fTNYthfm+XXfcnZoG304wFsfSSvdCXdydptPHKnwJh7n2h8t9+URIqHi0R N3BhO5Z1KNKd2l/RG6AewA9LtoZJ2yW+jW/MLqXpRBofd+1/pIaMz8qmSC3+bodzPPr0 tVtQN+hQpBDicECTHBxFclVRNy1o++NEj0yZUEML5s8wtbSgYonsLv4AhjoIIV4wErg5 6WwQ== X-Gm-Message-State: ALoCoQlRy6pmnwyrwmGxAuYHeYKssrUBs7DoSzZ5eezmr1XKx7s/A06Kn2ZTV9VKV+ZMmoVsDTN6 MIME-Version: 1.0 X-Received: by 10.182.104.42 with SMTP id gb10mr9374097obb.62.1425033239401; Fri, 27 Feb 2015 02:33:59 -0800 (PST) Received: by 10.76.133.162 with HTTP; Fri, 27 Feb 2015 02:33:59 -0800 (PST) In-Reply-To: <1425012976-10173-5-git-send-email-cunming.liang@intel.com> References: <1424710542-14637-1-git-send-email-danny.zhou@intel.com> <1425012976-10173-1-git-send-email-cunming.liang@intel.com> <1425012976-10173-5-git-send-email-cunming.liang@intel.com> Date: Fri, 27 Feb 2015 11:33:59 +0100 Message-ID: From: David Marchand To: Cunming Liang Content-Type: text/plain; charset=UTF-8 X-Content-Filtered-By: Mailman/MimeDel 2.1.15 Cc: "dev@dpdk.org" Subject: Re: [dpdk-dev] [PATCH v6 4/8] eal/linux: add per rx queue interrupt handling based on VFIO X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 27 Feb 2015 10:34:00 -0000 I am not really comfortable with this api. This is just creating something on top of the standard epoll api with limitations. In the end, we could just use an external lib that does this already. So ok, this will work for your limited use case, but this will not be really useful for anything else. Not sure it has its place in eal, this is more an example to me. On Fri, Feb 27, 2015 at 5:56 AM, Cunming Liang wrote: > This patch does below: > - Create multiple VFIO eventfd for rx queues. > - Handle per rx queue interrupt. > - Eliminate unnecessary suspended DPDK polling thread wakeup mechanism > for rx interrupt by allowing polling thread epoll_wait rx queue > interrupt notification. > > Signed-off-by: Danny Zhou > Signed-off-by: Cunming Liang > --- > v6 changes > - split rte_intr_wait_rx_pkt into two function, wait and set. > - rewrite rte_intr_rx_wait/rte_intr_rx_set to remove queue visibility on > eal. > - rte_intr_rx_wait to support multiplexing. > - allow epfd as input to support flexible event fd combination. > > > lib/librte_eal/linuxapp/eal/eal_interrupts.c | 224 > +++++++++++++++++++----- > lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 23 ++- > lib/librte_eal/linuxapp/eal/rte_eal_version.map | 2 + > 3 files changed, 201 insertions(+), 48 deletions(-) > > diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c > b/lib/librte_eal/linuxapp/eal/eal_interrupts.c > index 8c5b834..f90c2b4 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c > +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c > > [snip] > > +static void > +eal_intr_process_rxtx_interrupts(struct rte_intr_handle *intr_handle, > + struct epoll_event *events, > + uint32_t *vec, int nfds) > +{ > + int i, bytes_read; > + union rte_intr_read_buffer buf; > + int fd; > + > + for (i = 0; i < nfds; i++) { > + /* set the length to be read for different handle type */ > + switch (intr_handle->type) { > + case RTE_INTR_HANDLE_UIO: > + bytes_read = sizeof(buf.uio_intr_count); > + break; > + case RTE_INTR_HANDLE_ALARM: > + bytes_read = sizeof(buf.timerfd_num); > + break; > +#ifdef VFIO_PRESENT > + case RTE_INTR_HANDLE_VFIO_MSIX: > + case RTE_INTR_HANDLE_VFIO_MSI: > + case RTE_INTR_HANDLE_VFIO_LEGACY: > + bytes_read = sizeof(buf.vfio_intr_count); > + break; > +#endif > + default: > + bytes_read = 1; > + break; > + } > + > + /** > + * read out to clear the ready-to-be-read flag > + * for epoll_wait. > + */ > + vec[i] = events[i].data.u32; > + assert(vec[i] < VFIO_MAX_RXTX_INTR_ID); > + > + fd = intr_handle->efds[vec[i]]; > + bytes_read = read(fd, &buf, bytes_read); > + if (bytes_read < 0) > + RTE_LOG(ERR, EAL, "Error reading from file " > + "descriptor %d: %s\n", fd, > strerror(errno)); > + else if (bytes_read == 0) > + RTE_LOG(ERR, EAL, "Read nothing from file " > + "descriptor %d\n", fd); > + } > +} > Why unconditionnally read ? You are absorbing events from the application if the application gave you an external epfd and populated it with its own fds. > + > +static int init_tls_epfd(void) > +{ > + int pfd = epoll_create(1); > + if (pfd < 0) { > + RTE_LOG(ERR, EAL, > + "Cannot create epoll instance\n"); > + return -1; > + } > + return pfd; > +} > + > +int > +rte_intr_rx_wait(struct rte_intr_handle *intr_handle, int epfd, > + uint32_t *vec, uint16_t num) > +{ > In the end, this "rx" does not mean anything to eal. +#define MAX_EVENTS 8 > + struct epoll_event events[MAX_EVENTS]; > + int ret, nfds = 0; > + > + if (!intr_handle || !vec) { > + RTE_LOG(ERR, EAL, "invalid input parameter\n"); > + return -1; > + } > + > + if (intr_handle->type != RTE_INTR_HANDLE_VFIO_MSIX) { > + RTE_LOG(ERR, EAL, "intr type should be VFIO_MSIX\n"); > + return -1; > + } > + > + if (epfd == RTE_EPOLL_FD_ANY) { > + /* using per thread epoll fd */ > + if (unlikely(RTE_PER_LCORE(_epfd) == -1)) > + RTE_PER_LCORE(_epfd) = init_tls_epfd(); > + epfd = RTE_PER_LCORE(_epfd); > + } > Rather than testing every time, this should be set by the caller, i.e. epfd is always valid. If application does not want to create a epfd, then it calls rte_intr_rx_wait with RTE_EPOLL_FD_ANY (this name is not well chosen) that is a macro wrapped to RTE_PER_LCORE(_epfd). init_tls_epfd() should be called only once at init time. No need to check every time. + > + do { > + ret = epoll_wait(epfd, events, > + RTE_MIN(num, MAX_EVENTS), > + EAL_INTR_EPOLL_WAIT_FOREVER); > + if (unlikely(ret < 0)) { > + /* epoll_wait fail */ > + RTE_LOG(ERR, EAL, "epoll_wait returns with > fail\n"); > + return -1; > + } else if (ret > 0) { > + /* epoll_wait has at least one fd ready to read */ > + eal_intr_process_rxtx_interrupts(intr_handle, > events, > + vec, ret); > + num -= ret; > + vec += ret; > + nfds += ret; > + } else if (nfds > 0) > + break; > + } while (num > 0); > + > + return nfds; > +} > You are blocking unless all fds have been set, so you are serialising all events. + > +int > +rte_intr_rx_set(struct rte_intr_handle *intr_handle, int epfd, > + int op, uint32_t vec) > +{ > + struct epoll_event ev; > + > + if (!intr_handle || vec >= VFIO_MAX_RXTX_INTR_ID) { > + RTE_LOG(ERR, EAL, "invalid input parameter\n"); > + return -1; > + } > + > + if (intr_handle->type != RTE_INTR_HANDLE_VFIO_MSIX) { > + RTE_LOG(ERR, EAL, "intr type should be VFIO_MSIX\n"); > + return -1; > + } > + > + switch (op) { > + case RTE_INTR_EVENT_ADD: > + op = EPOLL_CTL_ADD; > + break; > + case RTE_INTR_EVENT_DEL: > + op = EPOLL_CTL_DEL; > + break; > + default: > + RTE_LOG(ERR, EAL, "event op type mismatch\n"); > + return -1; > + } > + > + if (epfd == RTE_EPOLL_FD_ANY) { > + /* using per thread epoll fd */ > + if (RTE_PER_LCORE(_epfd) == -1) > + RTE_PER_LCORE(_epfd) = init_tls_epfd(); > + epfd = RTE_PER_LCORE(_epfd); > + } > + > + ev.data.u32 = vec; > + ev.events = EPOLLIN | EPOLLPRI; > + if (epoll_ctl(epfd, op, intr_handle->efds[vec], &ev) < 0) { > + RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n", > + op, intr_handle->efds[vec], strerror(errno)); > + return -1; > + } > + > + return 0; > +} > > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > index ee9660f..d90d23c 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > @@ -38,6 +38,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -274,16 +275,18 @@ pci_vfio_setup_interrupts(struct rte_pci_device > *dev, int vfio_dev_fd) > ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); > if (ret < 0) { > RTE_LOG(ERR, EAL, " cannot get IRQ info, " > - "error %i (%s)\n", errno, > strerror(errno)); > + "error %i (%s)\n", errno, strerror(errno)); > return -1; > } > Garbage, this has nothing to do with the patch. > > /* if this vector cannot be used with eventfd, fail if we > explicitly > * specified interrupt type, otherwise continue */ > if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { > - if (internal_config.vfio_intr_mode != > RTE_INTR_MODE_NONE) { > + if (internal_config.vfio_intr_mode != > + RTE_INTR_MODE_NONE) { > RTE_LOG(ERR, EAL, > - " interrupt vector does > not support eventfd!\n"); > + " interrupt vector " > + "does not support eventfd!\n"); > return -1; > } else > continue; > Idem. > @@ -293,17 +296,27 @@ pci_vfio_setup_interrupts(struct rte_pci_device > *dev, int vfio_dev_fd) > fd = eventfd(0, 0); > if (fd < 0) { > RTE_LOG(ERR, EAL, " cannot set up eventfd, " > - "error %i (%s)\n", errno, > strerror(errno)); > + "error %i (%s)\n", errno, strerror(errno)); > Idem. > return -1; > } > > dev->intr_handle.fd = fd; > dev->intr_handle.vfio_dev_fd = vfio_dev_fd; > - > Idem. > switch (i) { > case VFIO_PCI_MSIX_IRQ_INDEX: > internal_config.vfio_intr_mode = > RTE_INTR_MODE_MSIX; > dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; > + for (i = 0; i < VFIO_MAX_RXTX_INTR_ID; i++) { > + fd = eventfd(0, 0); > + if (fd < 0) { > + RTE_LOG(ERR, EAL, > + "cannot setup eventfd," > + "error %i (%s)\n", > + errno, strerror(errno)); > + return -1; > + } > + dev->intr_handle.efds[i] = fd; > + } > break; > case VFIO_PCI_MSI_IRQ_INDEX: > internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI; > diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map > b/lib/librte_eal/linuxapp/eal/rte_eal_version.map > index 5f1857d..892a452 100644 > --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map > +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map > @@ -64,6 +64,8 @@ DPDK_2.0 { > rte_intr_callback_unregister; > rte_intr_disable; > rte_intr_enable; > + rte_intr_rx_set; > + rte_intr_rx_wait; > rte_log; > rte_log_add_in_history; > rte_log_cur_msg_loglevel; > -- > 1.8.1.4 > -- David Marchand