From: "Zhou, Danny" <danny.zhou@intel.com>
To: Neil Horman <nhorman@tuxdriver.com>
Cc: "dev@dpdk.org" <dev@dpdk.org>
Subject: Re: [dpdk-dev] [PATCH v3 4/5] eal: add per rx queue interrupt handling based on VFIO
Date: Thu, 19 Feb 2015 08:10:47 +0000 [thread overview]
Message-ID: <DFDF335405C17848924A094BC35766CF0AAA28E1@SHSMSX104.ccr.corp.intel.com> (raw)
In-Reply-To: <20150217155852.GD6309@neilslaptop.think-freely.org>
> -----Original Message-----
> From: Neil Horman [mailto:nhorman@tuxdriver.com]
> Sent: Tuesday, February 17, 2015 11:59 PM
> To: Zhou, Danny
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 4/5] eal: add per rx queue interrupt handling based on VFIO
>
> On Tue, Feb 17, 2015 at 09:47:18PM +0800, Zhou Danny wrote:
> > v3 changes:
> > - Fix review comments
> >
> > v2 changes:
> > - Fix compilation issue for a missed header file
> > - Bug fix: free unreleased resources on the exception path before return
> > - Consolidate coding style related review comments
> >
> > This patch does below:
> > - Create multiple VFIO eventfd for rx queues.
> > - Handle per rx queue interrupt.
> > - Eliminate unnecessary suspended DPDK polling thread wakeup mechanism
> > for rx interrupt by allowing polling thread epoll_wait rx queue
> > interrupt notification.
> >
> > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > Tested-by: Yong Liu <yong.liu@intel.com>
> > ---
> > lib/librte_eal/common/include/rte_eal.h | 12 ++
> > lib/librte_eal/linuxapp/eal/Makefile | 1 +
> > lib/librte_eal/linuxapp/eal/eal_interrupts.c | 190 ++++++++++++++++-----
> > lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 12 +-
> > .../linuxapp/eal/include/exec-env/rte_interrupts.h | 4 +
> > 5 files changed, 175 insertions(+), 44 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
> > index f4ecd2e..d81331f 100644
> > --- a/lib/librte_eal/common/include/rte_eal.h
> > +++ b/lib/librte_eal/common/include/rte_eal.h
> > @@ -150,6 +150,18 @@ int rte_eal_iopl_init(void);
> > * - On failure, a negative error value.
> > */
> > int rte_eal_init(int argc, char **argv);
> > +
> > +/**
> > + * @param port_id
> > + * the port id
> > + * @param queue_id
> > + * the queue id
> > + * @return
> > + * - On success, return 0
> > + * - On failure, returns -1.
> > + */
> > +int rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id);
> > +
> > /**
> > * Usage function typedef used by the application usage function.
> > *
> > diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
> > index e117cec..c593dfa 100644
> > --- a/lib/librte_eal/linuxapp/eal/Makefile
> > +++ b/lib/librte_eal/linuxapp/eal/Makefile
> > @@ -43,6 +43,7 @@ CFLAGS += -I$(SRCDIR)/include
> > CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
> > CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
> > CFLAGS += -I$(RTE_SDK)/lib/librte_ring
> > +CFLAGS += -I$(RTE_SDK)/lib/librte_mbuf
> > CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
> > CFLAGS += -I$(RTE_SDK)/lib/librte_malloc
> > CFLAGS += -I$(RTE_SDK)/lib/librte_ether
> > diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> > index dc2668a..97215ad 100644
> > --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> > +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> > @@ -64,6 +64,7 @@
> > #include <rte_malloc.h>
> > #include <rte_errno.h>
> > #include <rte_spinlock.h>
> > +#include <rte_ethdev.h>
> >
> > #include "eal_private.h"
> > #include "eal_vfio.h"
> > @@ -127,6 +128,9 @@ static pthread_t intr_thread;
> > #ifdef VFIO_PRESENT
> >
> > #define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int))
> > +/* irq set buffer length for queue interrupts and LSC interrupt */
> > +#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
> > + sizeof(int) * (VFIO_MAX_QUEUE_ID + 1))
> >
> > /* enable legacy (INTx) interrupts */
> > static int
> > @@ -218,10 +222,10 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) {
> > return 0;
> > }
> >
> > -/* enable MSI-X interrupts */
> > +/* enable MSI interrupts */
> > static int
> > vfio_enable_msi(struct rte_intr_handle *intr_handle) {
> > - int len, ret;
> > + int len, ret, max_intr;
> > char irq_set_buf[IRQ_SET_BUF_LEN];
> > struct vfio_irq_set *irq_set;
> > int *fd_ptr;
> > @@ -230,12 +234,19 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
> >
> > irq_set = (struct vfio_irq_set *) irq_set_buf;
> > irq_set->argsz = len;
> > - irq_set->count = 1;
> > + if ((!intr_handle->max_intr) ||
> > + (intr_handle->max_intr > VFIO_MAX_QUEUE_ID))
> > + max_intr = VFIO_MAX_QUEUE_ID + 1;
> > + else
> > + max_intr = intr_handle->max_intr;
> > +
> > + irq_set->count = max_intr;
> > irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
> > irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
> > irq_set->start = 0;
> > fd_ptr = (int *) &irq_set->data;
> > - *fd_ptr = intr_handle->fd;
> > + memcpy(fd_ptr, intr_handle->queue_fd, sizeof(intr_handle->queue_fd));
> > + fd_ptr[max_intr - 1] = intr_handle->fd;
> >
> > ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> >
> > @@ -244,27 +255,10 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
> > intr_handle->fd);
> > return -1;
> > }
> > -
> > - /* manually trigger interrupt to enable it */
> > - memset(irq_set, 0, len);
> > - len = sizeof(struct vfio_irq_set);
> > - irq_set->argsz = len;
> > - irq_set->count = 1;
> > - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
> > - irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
> > - irq_set->start = 0;
> > -
> > - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> > -
> > - if (ret) {
> > - RTE_LOG(ERR, EAL, "Error triggering MSI interrupts for fd %d\n",
> > - intr_handle->fd);
> > - return -1;
> > - }
> > return 0;
> > }
> >
> > -/* disable MSI-X interrupts */
> > +/* disable MSI interrupts */
> > static int
> > vfio_disable_msi(struct rte_intr_handle *intr_handle) {
> > struct vfio_irq_set *irq_set;
> > @@ -292,8 +286,8 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) {
> > /* enable MSI-X interrupts */
> > static int
> > vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> > - int len, ret;
> > - char irq_set_buf[IRQ_SET_BUF_LEN];
> > + int len, ret, max_intr;
> > + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
> > struct vfio_irq_set *irq_set;
> > int *fd_ptr;
> >
> > @@ -301,12 +295,19 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> >
> > irq_set = (struct vfio_irq_set *) irq_set_buf;
> > irq_set->argsz = len;
> > - irq_set->count = 1;
> > + if ((!intr_handle->max_intr) ||
> > + (intr_handle->max_intr > VFIO_MAX_QUEUE_ID))
> > + max_intr = VFIO_MAX_QUEUE_ID + 1;
> > + else
> > + max_intr = intr_handle->max_intr;
> > +
> > + irq_set->count = max_intr;
> > irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
> > irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> > irq_set->start = 0;
> > fd_ptr = (int *) &irq_set->data;
> > - *fd_ptr = intr_handle->fd;
> > + memcpy(fd_ptr, intr_handle->queue_fd, sizeof(intr_handle->queue_fd));
> > + fd_ptr[max_intr - 1] = intr_handle->fd;
> >
> > ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> >
> > @@ -316,22 +317,6 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> > return -1;
> > }
> >
> > - /* manually trigger interrupt to enable it */
> > - memset(irq_set, 0, len);
> > - len = sizeof(struct vfio_irq_set);
> > - irq_set->argsz = len;
> > - irq_set->count = 1;
> > - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
> > - irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> > - irq_set->start = 0;
> > -
> > - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> > -
> > - if (ret) {
> > - RTE_LOG(ERR, EAL, "Error triggering MSI-X interrupts for fd %d\n",
> > - intr_handle->fd);
> > - return -1;
> > - }
> > return 0;
> > }
> >
> > @@ -339,7 +324,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> > static int
> > vfio_disable_msix(struct rte_intr_handle *intr_handle) {
> > struct vfio_irq_set *irq_set;
> > - char irq_set_buf[IRQ_SET_BUF_LEN];
> > + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
> > int len, ret;
> >
> > len = sizeof(struct vfio_irq_set);
> > @@ -824,3 +809,122 @@ rte_eal_intr_init(void)
> > return -ret;
> > }
> >
> > +static void
> > +eal_intr_process_rx_interrupts(uint8_t port_id,
> > + struct epoll_event *events, int nfds)
> > +{
> > + int n, bytes_read;
> > + union rte_intr_read_buffer buf;
> > + struct rte_intr_handle intr_handle =
> > + rte_eth_devices[port_id].pci_dev->intr_handle;
> > +
> > + for (n = 0; n < nfds; n++) {
> > + /* set the length to be read for different handle type */
> > + switch (intr_handle.type) {
> > + case RTE_INTR_HANDLE_UIO:
> > + bytes_read = sizeof(buf.uio_intr_count);
> > + break;
> > + case RTE_INTR_HANDLE_ALARM:
> > + bytes_read = sizeof(buf.timerfd_num);
> > + break;
> > +#ifdef VFIO_PRESENT
> > + case RTE_INTR_HANDLE_VFIO_MSIX:
> > + case RTE_INTR_HANDLE_VFIO_MSI:
> > + case RTE_INTR_HANDLE_VFIO_LEGACY:
> > + bytes_read = sizeof(buf.vfio_intr_count);
> > + break;
> > +#endif
> > + default:
> > + bytes_read = 1;
> > + break;
> > + }
> > +
> > + /**
> > + * read out to clear the ready-to-be-read flag
> > + * for epoll_wait.
> > + */
> > + bytes_read = read(events[n].data.fd, &buf, bytes_read);
> > + if (bytes_read < 0)
> > + RTE_LOG(ERR, EAL, "Error reading from file "
> > + "descriptor %d: %s\n", events[n].data.fd,
> > + strerror(errno));
> > + else if (bytes_read == 0)
> > + RTE_LOG(ERR, EAL, "Read nothing from file "
> > + "descriptor %d\n", events[n].data.fd);
> > + }
> > +}
> > +
> > +static void
> > +eal_intr_handle_rx_interrupts(uint8_t port_id, int pfd, unsigned totalfds)
> > +{
> > + struct epoll_event events[totalfds];
> > + int nfds = 0;
> > +
> > + do {
> > + nfds = epoll_wait(pfd, events, totalfds,
> > + EAL_INTR_EPOLL_WAIT_FOREVER);
> > + /* epoll_wait fail */
> > + if (nfds < 0) {
> > + RTE_LOG(ERR, EAL,
> > + "epoll_wait returns with fail\n");
> > + return;
> > + }
> > + } while (nfds == 0);
> > +
> > + /* epoll_wait has at least one fd ready to read */
> > + eal_intr_process_rx_interrupts(port_id, events, nfds);
> > +}
> > +
> > +int
> > +rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id)
> > +{
> > + struct rte_intr_handle intr_handle =
> > + rte_eth_devices[port_id].pci_dev->intr_handle;
> > + struct epoll_event ev;
> > + unsigned numfds = 0;
> > +
> > + /* create epoll fd */
> > + int pfd = epoll_create(1);
> > + if (pfd < 0) {
> > + RTE_LOG(ERR, EAL, "Cannot create epoll instance\n");
> > + return -1;
> > + }
> > +
> > + rte_spinlock_lock(&intr_lock);
> > +
> > + ev.events = EPOLLIN | EPOLLPRI;
> > + switch (intr_handle.type) {
> > + case RTE_INTR_HANDLE_UIO:
> > + ev.data.fd = intr_handle.fd;
> > + break;
> > +#ifdef VFIO_PRESENT
> > + case RTE_INTR_HANDLE_VFIO_MSIX:
> > + case RTE_INTR_HANDLE_VFIO_MSI:
> > + case RTE_INTR_HANDLE_VFIO_LEGACY:
> > + ev.data.fd = intr_handle.queue_fd[queue_id];
> > + break;
> > +#endif
> > + default:
> > + rte_spinlock_unlock(&intr_lock);
> > + close(pfd);
> > + return -1;
> > + }
> > +
> > + if (epoll_ctl(pfd, EPOLL_CTL_ADD, ev.data.fd, &ev) < 0) {
> > + RTE_LOG(ERR, EAL, "Error adding fd %d epoll_ctl, %s\n",
> > + intr_handle.queue_fd[queue_id], strerror(errno));
> > + } else
> > + numfds++;
> > +
> > + rte_spinlock_unlock(&intr_lock);
> > + /* serve the interrupt */
> > + eal_intr_handle_rx_interrupts(port_id, pfd, numfds);
> > +
> > + /**
> > + * when we return, we need to rebuild the
> > + * list of fds to monitor.
> > + */
> > + close(pfd);
> > +
> > + return 0;
> > +}
> > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > index 20e0977..0e5fa76 100644
> > --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > @@ -283,11 +283,21 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
> >
> > dev->intr_handle.fd = fd;
> > dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
> > -
> > switch (i) {
> > case VFIO_PCI_MSIX_IRQ_INDEX:
> > internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
> > dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX;
> > + for (i = 0; i < VFIO_MAX_QUEUE_ID; i++) {
> > + fd = eventfd(0, 0);
> > + if (fd < 0) {
> > + RTE_LOG(ERR, EAL,
> > + "cannot setup eventfd,"
> > + "error %i (%s)\n",
> > + errno, strerror(errno));
> > + return -1;
> > + }
> > + dev->intr_handle.queue_fd[i] = fd;
> > + }
> > break;
> > case VFIO_PCI_MSI_IRQ_INDEX:
> > internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI;
> > diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > index 23eafd9..c6982cf 100644
> > --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > @@ -38,6 +38,8 @@
> > #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
> > #define _RTE_LINUXAPP_INTERRUPTS_H_
> >
> > +#define VFIO_MAX_QUEUE_ID 32
> > +
> > enum rte_intr_handle_type {
> > RTE_INTR_HANDLE_UNKNOWN = 0,
> > RTE_INTR_HANDLE_UIO, /**< uio device handle */
> > @@ -52,6 +54,8 @@ enum rte_intr_handle_type {
> > struct rte_intr_handle {
> > int vfio_dev_fd; /**< VFIO device file descriptor */
> > int fd; /**< file descriptor */
> > + int max_intr; /**< max interrupt requested */
> > + int queue_fd[VFIO_MAX_QUEUE_ID]; /**< rx and tx queue interrupt file descriptor */
> This is used outside of this library, you need to move these new fields to the
> end of the structure.
>
> neil
Alright, I will move them to the end in V4 patch.
Neil, do you have any simple writeup on guideline about how to add APIs and new fields to existing
structure in order to make sure new stuff does not break ABI? It might help all the developers to avoid
making similar mistakes in the future.
>
> > enum rte_intr_handle_type type; /**< handle type */
> > };
> >
> > --
> > 1.8.1.4
> >
> >
next prev parent reply other threads:[~2015-02-19 8:11 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-02-17 13:47 [dpdk-dev] [PATCH v3 0/5] Interrupt mode PMD Zhou Danny
2015-02-17 13:47 ` [dpdk-dev] [PATCH v3 1/5] ethdev: add rx interrupt enable/disable functions Zhou Danny
2015-02-17 15:52 ` Neil Horman
2015-02-19 8:06 ` Zhou, Danny
2015-02-19 8:21 ` Gonzalez Monroy, Sergio
2015-02-19 8:34 ` Zhou, Danny
2015-02-19 13:09 ` Neil Horman
2015-02-19 13:15 ` Zhou, Danny
2015-02-17 15:54 ` Neil Horman
2015-02-19 7:58 ` Zhou, Danny
2015-02-19 13:02 ` Neil Horman
2015-02-17 13:47 ` [dpdk-dev] [PATCH v3 2/5] ixgbe: enable rx queue interrupts for both PF and VF Zhou Danny
2015-02-17 13:47 ` [dpdk-dev] [PATCH v3 3/5] igb: enable rx queue interrupts for PF Zhou Danny
2015-02-17 13:47 ` [dpdk-dev] [PATCH v3 4/5] eal: add per rx queue interrupt handling based on VFIO Zhou Danny
2015-02-17 15:58 ` Neil Horman
2015-02-19 8:10 ` Zhou, Danny [this message]
2015-02-19 13:04 ` Neil Horman
2015-02-17 13:47 ` [dpdk-dev] [PATCH v3 5/5] l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch Zhou Danny
2015-02-18 1:51 ` [dpdk-dev] [PATCH v3 0/5] Interrupt mode PMD Liang, Cunming
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=DFDF335405C17848924A094BC35766CF0AAA28E1@SHSMSX104.ccr.corp.intel.com \
--to=danny.zhou@intel.com \
--cc=dev@dpdk.org \
--cc=nhorman@tuxdriver.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).