From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by dpdk.org (Postfix) with ESMTP id D47EE5A82 for ; Wed, 28 Jan 2015 10:50:49 +0100 (CET) Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by orsmga103.jf.intel.com with ESMTP; 28 Jan 2015 01:46:28 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.09,480,1418112000"; d="scan'208";a="668892561" Received: from shvmail01.sh.intel.com ([10.239.29.42]) by fmsmga002.fm.intel.com with ESMTP; 28 Jan 2015 01:50:46 -0800 Received: from shecgisg004.sh.intel.com (shecgisg004.sh.intel.com [10.239.29.89]) by shvmail01.sh.intel.com with ESMTP id t0S9oiE0011573; Wed, 28 Jan 2015 17:50:44 +0800 Received: from shecgisg004.sh.intel.com (localhost [127.0.0.1]) by shecgisg004.sh.intel.com (8.13.6/8.13.6/SuSE Linux 0.8) with ESMTP id t0S9oew7007909; Wed, 28 Jan 2015 17:50:43 +0800 Received: (from dyzhou@localhost) by shecgisg004.sh.intel.com (8.13.6/8.13.6/Submit) id t0S9oeta007905; Wed, 28 Jan 2015 17:50:40 +0800 From: Danny Zhou To: dev@dpdk.org Date: Wed, 28 Jan 2015 17:50:29 +0800 Message-Id: <1422438631-7853-4-git-send-email-danny.zhou@intel.com> X-Mailer: git-send-email 1.7.4.1 In-Reply-To: <1422438631-7853-1-git-send-email-danny.zhou@intel.com> References: <1422438631-7853-1-git-send-email-danny.zhou@intel.com> Subject: [dpdk-dev] [PATCH v1 3/5] igb: enable rx queue interrupts for PF X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 28 Jan 2015 09:50:50 -0000 Signed-off-by: Danny Zhou --- lib/librte_pmd_e1000/e1000/e1000_hw.h | 3 + lib/librte_pmd_e1000/e1000_ethdev.h | 6 + lib/librte_pmd_e1000/igb_ethdev.c | 265 ++++++++++++++++++++++++++++++---- 3 files changed, 249 insertions(+), 25 deletions(-) diff --git a/lib/librte_pmd_e1000/e1000/e1000_hw.h b/lib/librte_pmd_e1000/e1000/e1000_hw.h index 4dd92a3..9b999ec 100644 --- a/lib/librte_pmd_e1000/e1000/e1000_hw.h +++ b/lib/librte_pmd_e1000/e1000/e1000_hw.h @@ -780,6 +780,9 @@ struct e1000_mac_info { u16 mta_reg_count; u16 uta_reg_count; + u32 max_rx_queues; + u32 max_tx_queues; + /* Maximum size of the MTA register table in all supported adapters */ #define MAX_MTA_REG 128 u32 mta_shadow[MAX_MTA_REG]; diff --git a/lib/librte_pmd_e1000/e1000_ethdev.h b/lib/librte_pmd_e1000/e1000_ethdev.h index d155e77..713ca11 100644 --- a/lib/librte_pmd_e1000/e1000_ethdev.h +++ b/lib/librte_pmd_e1000/e1000_ethdev.h @@ -34,6 +34,8 @@ #ifndef _E1000_ETHDEV_H_ #define _E1000_ETHDEV_H_ +#include + /* need update link, bit flag */ #define E1000_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0) #define E1000_FLAG_MAILBOX (uint32_t)(1 << 1) @@ -105,10 +107,14 @@ #define E1000_FTQF_QUEUE_SHIFT 16 #define E1000_FTQF_QUEUE_ENABLE 0x00000100 +/* maximum number of other interrupts besides Rx & Tx interrupts */ +#define E1000_MAX_OTHER_INTR 1 + /* structure for interrupt relative data */ struct e1000_interrupt { uint32_t flags; uint32_t mask; + rte_spinlock_t lock; }; /* local vfta copy */ diff --git a/lib/librte_pmd_e1000/igb_ethdev.c b/lib/librte_pmd_e1000/igb_ethdev.c index 2a268b8..2a9bf00 100644 --- a/lib/librte_pmd_e1000/igb_ethdev.c +++ b/lib/librte_pmd_e1000/igb_ethdev.c @@ -97,6 +97,7 @@ static int eth_igb_flow_ctrl_get(struct rte_eth_dev *dev, static int eth_igb_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); static int eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev); +static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev); static int eth_igb_interrupt_get_status(struct rte_eth_dev *dev); static int eth_igb_interrupt_action(struct rte_eth_dev *dev); static void eth_igb_interrupt_handler(struct rte_intr_handle *handle, @@ -191,6 +192,12 @@ static int eth_igb_filter_ctrl(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg); +static int eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id); +static int eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id); +static void eth_igb_assign_vector(struct e1000_hw *hw, s8 direction, u8 queue, u8 msix_vector); +static void eth_igb_configure_msix(struct e1000_hw *hw); +static void eth_igb_write_ivar(struct e1000_hw *hw, u8 msix_vector, u8 index, u8 offset); + /* * Define VF Stats MACRO for Non "cleared on read" register */ @@ -250,6 +257,8 @@ static struct eth_dev_ops eth_igb_ops = { .vlan_tpid_set = eth_igb_vlan_tpid_set, .vlan_offload_set = eth_igb_vlan_offload_set, .rx_queue_setup = eth_igb_rx_queue_setup, + .rx_queue_intr_enable = eth_igb_rx_queue_intr_enable, + .rx_queue_intr_disable = eth_igb_rx_queue_intr_disable, .rx_queue_release = eth_igb_rx_queue_release, .rx_queue_count = eth_igb_rx_queue_count, .rx_descriptor_done = eth_igb_rx_descriptor_done, @@ -592,6 +601,16 @@ eth_igb_dev_init(__attribute__((unused)) struct eth_driver *eth_drv, eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); + /* set max interrupt vfio request */ + struct rte_eth_dev_info dev_info; + + memset(&dev_info, 0, sizeof(dev_info)); + eth_igb_infos_get(eth_dev, &dev_info); + + hw->mac.max_rx_queues = dev_info.max_rx_queues; + + pci_dev->intr_handle.max_intr = hw->mac.max_rx_queues + E1000_MAX_OTHER_INTR; + rte_intr_callback_register(&(pci_dev->intr_handle), eth_igb_interrupt_handler, (void *)eth_dev); @@ -754,7 +773,7 @@ eth_igb_start(struct rte_eth_dev *dev) { struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); - int ret, i, mask; + int ret, mask; uint32_t ctrl_ext; PMD_INIT_FUNC_TRACE(); @@ -794,6 +813,9 @@ eth_igb_start(struct rte_eth_dev *dev) /* configure PF module if SRIOV enabled */ igb_pf_host_configure(dev); + /* confiugre msix for sleep until rx interrupt */ + eth_igb_configure_msix(hw); + /* Configure for OS presence */ igb_init_manageability(hw); @@ -821,33 +843,9 @@ eth_igb_start(struct rte_eth_dev *dev) igb_vmdq_vlan_hw_filter_enable(dev); } - /* - * Configure the Interrupt Moderation register (EITR) with the maximum - * possible value (0xFFFF) to minimize "System Partial Write" issued by - * spurious [DMA] memory updates of RX and TX ring descriptors. - * - * With a EITR granularity of 2 microseconds in the 82576, only 7/8 - * spurious memory updates per second should be expected. - * ((65535 * 2) / 1000.1000 ~= 0.131 second). - * - * Because interrupts are not used at all, the MSI-X is not activated - * and interrupt moderation is controlled by EITR[0]. - * - * Note that having [almost] disabled memory updates of RX and TX ring - * descriptors through the Interrupt Moderation mechanism, memory - * updates of ring descriptors are now moderated by the configurable - * value of Write-Back Threshold registers. - */ if ((hw->mac.type == e1000_82576) || (hw->mac.type == e1000_82580) || (hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { - uint32_t ivar; - - /* Enable all RX & TX queues in the IVAR registers */ - ivar = (uint32_t) ((E1000_IVAR_VALID << 16) | E1000_IVAR_VALID); - for (i = 0; i < 8; i++) - E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, i, ivar); - /* Configure EITR with the maximum possible value (0xFFFF) */ E1000_WRITE_REG(hw, E1000_EITR(0), 0xFFFF); } @@ -901,6 +899,10 @@ eth_igb_start(struct rte_eth_dev *dev) if (dev->data->dev_conf.intr_conf.lsc != 0) ret = eth_igb_lsc_interrupt_setup(dev); + /* check if rxq interrupt is enabled */ + if (dev->data->dev_conf.intr_conf.rxq != 0) + eth_igb_rxq_interrupt_setup(dev); + /* resume enabled intr since hw reset */ igb_intr_enable(dev); @@ -1791,6 +1793,35 @@ eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev) E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private); intr->mask |= E1000_ICR_LSC; + rte_spinlock_init(&(intr->lock)); + + return 0; +} + +/* + * It clears the interrupt causes and enables the interrupt. + * It will be called once only during nic initialized. + * + * @param dev + * Pointer to struct rte_eth_dev. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev) +{ + uint32_t mask, regval; + struct e1000_hw *hw = + E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_eth_dev_info dev_info; + + memset(&dev_info, 0, sizeof(dev_info)); + eth_igb_infos_get(dev, &dev_info); + + mask = 0xFFFFFFFF >> (32 - dev_info.max_rx_queues); + regval = E1000_READ_REG(hw, E1000_EIMS); + E1000_WRITE_REG(hw, E1000_EIMS, regval | mask); return 0; } @@ -3256,5 +3287,189 @@ static struct rte_driver pmd_igbvf_drv = { .init = rte_igbvf_pmd_init, }; +static int +eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) +{ + struct e1000_hw *hw = + E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct e1000_interrupt *intr = + E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private); + uint32_t mask = 1 << queue_id; + + rte_spinlock_lock(&(intr->lock)); + E1000_WRITE_REG(hw, E1000_EIMC, mask); + E1000_WRITE_FLUSH(hw); + rte_spinlock_unlock(&(intr->lock)); + + return 0; +} + +static int +eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) +{ + struct e1000_hw *hw = + E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct e1000_interrupt *intr = + E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private); + uint32_t mask = 1 << queue_id; + uint32_t regval; + + rte_spinlock_lock(&(intr->lock)); + regval = E1000_READ_REG(hw, E1000_EIMS); + E1000_WRITE_REG(hw, E1000_EIMS, regval | mask); + E1000_WRITE_FLUSH(hw); + rte_spinlock_unlock(&(intr->lock)); + + return 0; +} + +static void +eth_igb_write_ivar(struct e1000_hw *hw, u8 msix_vector, u8 index, u8 offset) +{ + uint32_t ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); + + /* clear any bits that are currently set */ + ivar &= ~((uint32_t)0xFF << offset); + + /* write vector and valid bit */ + ivar |= (msix_vector | E1000_IVAR_VALID) << offset; + + E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); +} + +static void +eth_igb_assign_vector(struct e1000_hw *hw, s8 direction, u8 queue, u8 msix_vector) +{ + uint32_t msixbm = 0; + switch (hw->mac.type) { + case e1000_82575: + /* The 82575 assigns vectors using a bitmask, which matches the + * bitmask for the EICR/EIMS/EIMC registers. To assign one + * or more queues to a vector, we write the appropriate bits + * into the MSIXBM register for that vector. + */ + if (direction == 0) + msixbm = E1000_EICR_RX_QUEUE0 << queue; + else if (direction == 1) + msixbm = E1000_EICR_TX_QUEUE0 << queue; + E1000_WRITE_REG(hw, E1000_MSIXBM(msix_vector), msixbm); + break; + case e1000_82576: + /* 82576 uses a table that essentially consists of 2 columns + * with 8 rows. The ordering is column-major so we use the + * lower 3 bits as the row index, and the 4th bit as the + * column offset. + */ + if (direction == 0) + eth_igb_write_ivar(hw, msix_vector, queue & 0x7, + (queue & 0x8) << 1); + else if (direction == 1) + eth_igb_write_ivar(hw, msix_vector, queue & 0x7, + ((queue & 0x8) << 1) + 8); + break; + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + /* On 82580 and newer adapters the scheme is similar to 82576 + * however instead of ordering column-major we have things + * ordered row-major. So we traverse the table by using + * bit 0 as the column offset, and the remaining bits as the + * row index. + */ + if (direction == 0) + eth_igb_write_ivar(hw, msix_vector, + queue >> 1, + (queue & 0x1) << 4); + else if (direction == 1) + eth_igb_write_ivar(hw, msix_vector, + queue >> 1, + ((queue & 0x1) << 4) + 8); + break; + default: + break; + } +} + +/* + * eth_igb_configure_msix - Configure MSI-X hardware + * @hw: board private structure + * eth_igb_configure_msix sets up the hardware to properly generate MSI-X + * interrupts. + */ +static void +eth_igb_configure_msix(struct e1000_hw *hw) +{ + int queue_id; + uint32_t tmp, regval, mask; + uint32_t max_rx_queues = hw->mac.max_rx_queues; + + /* set vector for other causes, i.e. link changes */ + switch (hw->mac.type) { + case e1000_82575: + tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); + /* enable MSI-X PBA support */ + tmp |= E1000_CTRL_EXT_PBA_CLR; + + /* Auto-Mask interrupts upon ICR read */ + tmp |= E1000_CTRL_EXT_EIAME; + tmp |= E1000_CTRL_EXT_IRCA; + + E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); + + /* enable msix_other interrupt */ + E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), 0, E1000_EIMS_OTHER); + regval = E1000_READ_REG(hw, E1000_EIAC); + E1000_WRITE_REG(hw, E1000_EIAC, regval | E1000_EIMS_OTHER); + regval = E1000_READ_REG(hw, E1000_EIAM); + E1000_WRITE_REG(hw, E1000_EIMS, regval | E1000_EIMS_OTHER); + + break; + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + /* Turn on MSI-X capability first, or our settings won't stick. + * And it will take days to debug. + */ + E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE | + E1000_GPIE_PBA | E1000_GPIE_EIAME | + E1000_GPIE_NSICR); + + /* enable msix_other interrupt */ + mask = 1 << max_rx_queues; + regval = E1000_READ_REG(hw, E1000_EIAC); + E1000_WRITE_REG(hw, E1000_EIAC, regval | mask); + regval = E1000_READ_REG(hw, E1000_EIMS); + E1000_WRITE_REG(hw, E1000_EIMS, regval | mask); + tmp = (max_rx_queues | E1000_IVAR_VALID) << 8; + + E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmp); + break; + default: + /* do nothing, since nothing else supports MSI-X */ + break; + } + + /* + * use EIAM and EIAC to auto-mask and auto-clear when MSI-X interrupt is asserted + * this saves a register write for every interrupt + */ + mask = 0xFFFFFFFF >> (32 - max_rx_queues); + regval = E1000_READ_REG(hw, E1000_EIAC); + E1000_WRITE_REG(hw, E1000_EIAC, regval | mask); + regval = E1000_READ_REG(hw, E1000_EIAM); + E1000_WRITE_REG(hw, E1000_EIAM, regval | mask); + + for (queue_id = 0; queue_id < VFIO_MAX_QUEUE_ID; queue_id++) + eth_igb_assign_vector(hw, 0, queue_id, queue_id); + + E1000_WRITE_FLUSH(hw); +} + + PMD_REGISTER_DRIVER(pmd_igb_drv); PMD_REGISTER_DRIVER(pmd_igbvf_drv); -- 1.8.1.4