* [dpdk-dev] [PATCH v4 1/5] ethdev: add rx interrupt enable/disable functions
2015-02-19 13:48 [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD Zhou Danny
@ 2015-02-19 13:48 ` Zhou Danny
2015-02-19 13:48 ` [dpdk-dev] [PATCH v4 2/5] ixgbe: enable rx queue interrupts for both PF and VF Zhou Danny
` (4 subsequent siblings)
5 siblings, 0 replies; 19+ messages in thread
From: Zhou Danny @ 2015-02-19 13:48 UTC (permalink / raw)
To: dev
v4 changes
- Export interrupt enable/disable functions for shared libraries
- Put new functions at the end of eth_dev_ops to avoid breaking ABI
v3 changes
- Add return value for interrupt enable/disable functions
Add two dev_ops functions to enable and disable rx queue interrupts
Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Tested-by: Yong Liu <yong.liu@intel.com>
---
lib/librte_ether/rte_ethdev.c | 43 +++++++++++++++++++++++++
lib/librte_ether/rte_ethdev.h | 59 ++++++++++++++++++++++++++++++++++
lib/librte_ether/rte_ether_version.map | 2 ++
3 files changed, 104 insertions(+)
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index ea3a1fb..d27469a 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -2825,6 +2825,49 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
}
rte_spinlock_unlock(&rte_eth_dev_cb_lock);
}
+
+int
+rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
+ uint16_t queue_id)
+{
+ struct rte_eth_dev *dev;
+
+ if (port_id >= nb_ports) {
+ PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+ return (-ENODEV);
+ }
+
+ dev = &rte_eth_devices[port_id];
+ if (dev == NULL) {
+ PMD_DEBUG_TRACE("Invalid port device\n");
+ return (-ENODEV);
+ }
+
+ FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP);
+ return (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id);
+}
+
+int
+rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
+ uint16_t queue_id)
+{
+ struct rte_eth_dev *dev;
+
+ if (port_id >= nb_ports) {
+ PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+ return (-ENODEV);
+ }
+
+ dev = &rte_eth_devices[port_id];
+ if (dev == NULL) {
+ PMD_DEBUG_TRACE("Invalid port device\n");
+ return (-ENODEV);
+ }
+
+ FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP);
+ return (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id);
+}
+
#ifdef RTE_NIC_BYPASS
int rte_eth_dev_bypass_init(uint8_t port_id)
{
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 84160c3..43035c2 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -848,6 +848,8 @@ struct rte_eth_fdir {
struct rte_intr_conf {
/** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */
uint16_t lsc;
+ /** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */
+ uint16_t rxq;
};
/**
@@ -1109,6 +1111,14 @@ typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
const struct rte_eth_txconf *tx_conf);
/**< @internal Setup a transmit queue of an Ethernet device. */
+typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
+ uint16_t rx_queue_id);
+/**< @internal Enable interrupt of a receive queue of an Ethernet device. */
+
+typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
+ uint16_t rx_queue_id);
+/**< @internal Disable interrupt of a receive queue of an Ethernet device. */
+
typedef void (*eth_queue_release_t)(void *queue);
/**< @internal Release memory resources allocated by given RX/TX queue. */
@@ -1520,6 +1530,10 @@ struct eth_dev_ops {
eth_remove_flex_filter_t remove_flex_filter; /**< remove flex filter. */
eth_get_flex_filter_t get_flex_filter; /**< get flex filter. */
eth_filter_ctrl_t filter_ctrl; /**< common filter control*/
+
+ /** Enable/disable Rx queue interrupt. */
+ eth_rx_enable_intr_t rx_queue_intr_enable; /**< Enable Rx queue interrupt. */
+ eth_rx_disable_intr_t rx_queue_intr_disable; /**< Disable Rx queue interrupt.*/
};
/**
@@ -2811,6 +2825,51 @@ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
enum rte_eth_event_type event);
/**
+ * When there is no rx packet coming in Rx Queue for a long time, we can
+ * sleep lcore related to RX Queue for power saving, and enable rx interrupt
+ * to be triggered when rx packect arrives.
+ *
+ * The rte_eth_dev_rx_queue_intr_enable() function enables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The index of the receive queue from which to retrieve input packets.
+ * The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ * to rte_eth_dev_configure().
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ * that operation.
+ * - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
+ uint16_t queue_id);
+
+/**
+ * When lcore wakes up from rx interrupt indicating packet coming, disable rx
+ * interrupt and returns to polling mode.
+ *
+ * The rte_eth_dev_rx_queue_intr_disable() function disables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The index of the receive queue from which to retrieve input packets.
+ * The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ * to rte_eth_dev_configure().
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ * that operation.
+ * - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
+ uint16_t queue_id);
+
+/**
* Turn on the LED on the Ethernet device.
* This function turns on the LED on the Ethernet device.
*
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index 7316530..1e7af6e 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -57,6 +57,8 @@ DPDK_2.0 {
rte_eth_dev_rss_hash_update;
rte_eth_dev_rss_reta_query;
rte_eth_dev_rss_reta_update;
+ rte_eth_dev_rx_queue_intr_disable;
+ rte_eth_dev_rx_queue_intr_enable;
rte_eth_dev_rx_queue_start;
rte_eth_dev_rx_queue_stop;
rte_eth_dev_set_link_down;
--
1.8.1.4
^ permalink raw reply [flat|nested] 19+ messages in thread
* [dpdk-dev] [PATCH v4 2/5] ixgbe: enable rx queue interrupts for both PF and VF
2015-02-19 13:48 [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD Zhou Danny
2015-02-19 13:48 ` [dpdk-dev] [PATCH v4 1/5] ethdev: add rx interrupt enable/disable functions Zhou Danny
@ 2015-02-19 13:48 ` Zhou Danny
2015-02-23 11:19 ` Thomas Monjalon
2015-02-19 13:48 ` [dpdk-dev] [PATCH v4 3/5] igb: enable rx queue interrupts for PF Zhou Danny
` (3 subsequent siblings)
5 siblings, 1 reply; 19+ messages in thread
From: Zhou Danny @ 2015-02-19 13:48 UTC (permalink / raw)
To: dev
v3 changes
- Remove spinlok from PMD
v2 changes
- Consolidate review comments related to coding style
The patch does below things for ixgbe PF and VF:
- Setup NIC to generate MSI-X interrupts
- Set the IVAR register to map interrupt causes to vectors
- Implement interrupt enable/disable functions
Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Signed-off-by: Yong Liu <yong.liu@intel.com>
Tested-by: Yong Liu <yong.liu@intel.com>
---
lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 365 +++++++++++++++++++++++++++++++++++-
lib/librte_pmd_ixgbe/ixgbe_ethdev.h | 6 +
2 files changed, 367 insertions(+), 4 deletions(-)
diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
index d6d408e..7e72808 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
@@ -83,6 +83,9 @@
*/
#define IXGBE_FC_LO 0x40
+/* Default minimum inter-interrupt interval for EITR configuration */
+#define IXGBE_MIN_INTER_INTERRUPT_INTERVAL_DEFAULT 0x79E
+
/* Timer value included in XOFF frames. */
#define IXGBE_FC_PAUSE 0x680
@@ -173,6 +176,7 @@ static int ixgbe_dev_rss_reta_query(struct rte_eth_dev *dev,
uint16_t reta_size);
static void ixgbe_dev_link_status_print(struct rte_eth_dev *dev);
static int ixgbe_dev_lsc_interrupt_setup(struct rte_eth_dev *dev);
+static int ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev);
static int ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev);
static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev);
static void ixgbe_dev_interrupt_handler(struct rte_intr_handle *handle,
@@ -186,11 +190,14 @@ static void ixgbe_dcb_init(struct ixgbe_hw *hw,struct ixgbe_dcb_config *dcb_conf
/* For Virtual Function support */
static int eth_ixgbevf_dev_init(struct eth_driver *eth_drv,
struct rte_eth_dev *eth_dev);
+static int ixgbevf_dev_interrupt_get_status(struct rte_eth_dev *dev);
+static int ixgbevf_dev_interrupt_action(struct rte_eth_dev *dev);
static int ixgbevf_dev_configure(struct rte_eth_dev *dev);
static int ixgbevf_dev_start(struct rte_eth_dev *dev);
static void ixgbevf_dev_stop(struct rte_eth_dev *dev);
static void ixgbevf_dev_close(struct rte_eth_dev *dev);
static void ixgbevf_intr_disable(struct ixgbe_hw *hw);
+static void ixgbevf_intr_enable(struct ixgbe_hw *hw);
static void ixgbevf_dev_stats_get(struct rte_eth_dev *dev,
struct rte_eth_stats *stats);
static void ixgbevf_dev_stats_reset(struct rte_eth_dev *dev);
@@ -200,6 +207,15 @@ static void ixgbevf_vlan_strip_queue_set(struct rte_eth_dev *dev,
uint16_t queue, int on);
static void ixgbevf_vlan_offload_set(struct rte_eth_dev *dev, int mask);
static void ixgbevf_set_vfta_all(struct rte_eth_dev *dev, bool on);
+static void ixgbevf_dev_interrupt_handler(struct rte_intr_handle *handle,
+ void *param);
+static int ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev,
+ uint16_t queue_id);
+static int ixgbevf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev,
+ uint16_t queue_id);
+static void ixgbevf_set_ivar_map(struct ixgbe_hw *hw, int8_t direction,
+ uint8_t queue, uint8_t msix_vector);
+static void ixgbevf_configure_msix(struct ixgbe_hw *hw);
/* For Eth VMDQ APIs support */
static int ixgbe_uc_hash_table_set(struct rte_eth_dev *dev, struct
@@ -217,6 +233,14 @@ static int ixgbe_mirror_rule_set(struct rte_eth_dev *dev,
static int ixgbe_mirror_rule_reset(struct rte_eth_dev *dev,
uint8_t rule_id);
+static int ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev,
+ uint16_t queue_id);
+static int ixgbe_dev_rx_queue_intr_disable(struct rte_eth_dev *dev,
+ uint16_t queue_id);
+static void ixgbe_set_ivar_map(struct ixgbe_hw *hw, int8_t direction,
+ uint8_t queue, uint8_t msix_vector);
+static void ixgbe_configure_msix(struct ixgbe_hw *hw);
+
static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev,
uint16_t queue_idx, uint16_t tx_rate);
static int ixgbe_set_vf_rate_limit(struct rte_eth_dev *dev, uint16_t vf,
@@ -257,7 +281,7 @@ static int ixgbe_dev_filter_ctrl(struct rte_eth_dev *dev,
*/
#define UPDATE_VF_STAT(reg, last, cur) \
{ \
- u32 latest = IXGBE_READ_REG(hw, reg); \
+ uint32_t latest = IXGBE_READ_REG(hw, reg); \
cur += latest - last; \
last = latest; \
}
@@ -338,6 +362,8 @@ static struct eth_dev_ops ixgbe_eth_dev_ops = {
.tx_queue_start = ixgbe_dev_tx_queue_start,
.tx_queue_stop = ixgbe_dev_tx_queue_stop,
.rx_queue_setup = ixgbe_dev_rx_queue_setup,
+ .rx_queue_intr_enable = ixgbe_dev_rx_queue_intr_enable,
+ .rx_queue_intr_disable = ixgbe_dev_rx_queue_intr_disable,
.rx_queue_release = ixgbe_dev_rx_queue_release,
.rx_queue_count = ixgbe_dev_rx_queue_count,
.rx_descriptor_done = ixgbe_dev_rx_descriptor_done,
@@ -412,8 +438,11 @@ static struct eth_dev_ops ixgbevf_eth_dev_ops = {
.vlan_offload_set = ixgbevf_vlan_offload_set,
.rx_queue_setup = ixgbe_dev_rx_queue_setup,
.rx_queue_release = ixgbe_dev_rx_queue_release,
+ .rx_descriptor_done = ixgbe_dev_rx_descriptor_done,
.tx_queue_setup = ixgbe_dev_tx_queue_setup,
.tx_queue_release = ixgbe_dev_tx_queue_release,
+ .rx_queue_intr_enable = ixgbevf_dev_rx_queue_intr_enable,
+ .rx_queue_intr_disable = ixgbevf_dev_rx_queue_intr_disable,
.mac_addr_add = ixgbevf_add_mac_addr,
.mac_addr_remove = ixgbevf_remove_mac_addr,
};
@@ -908,6 +937,10 @@ eth_ixgbe_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
eth_dev->data->port_id, pci_dev->id.vendor_id,
pci_dev->id.device_id);
+ /* set max interrupt vfio request */
+ pci_dev->intr_handle.max_intr = hw->mac.max_rx_queues +
+ IXGBE_MAX_OTHER_INTR;
+
rte_intr_callback_register(&(pci_dev->intr_handle),
ixgbe_dev_interrupt_handler, (void *)eth_dev);
@@ -1084,6 +1117,15 @@ eth_ixgbevf_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
return (-EIO);
}
+ /* set max interrupt vfio request */
+ pci_dev->intr_handle.max_intr = hw->mac.max_rx_queues +
+ IXGBEVF_MAX_OTHER_INTR;
+
+ rte_intr_callback_register(&(pci_dev->intr_handle),
+ ixgbevf_dev_interrupt_handler, (void *)eth_dev);
+
+ rte_intr_enable(&(pci_dev->intr_handle));
+
PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x mac.type=%s",
eth_dev->data->port_id, pci_dev->id.vendor_id,
pci_dev->id.device_id, "ixgbe_mac_82599_vf");
@@ -1485,6 +1527,9 @@ ixgbe_dev_start(struct rte_eth_dev *dev)
/* configure PF module if SRIOV enabled */
ixgbe_pf_host_configure(dev);
+ /* confiugre msix for sleep until rx interrupt */
+ ixgbe_configure_msix(hw);
+
/* initialize transmission unit */
ixgbe_dev_tx_init(dev);
@@ -1560,6 +1605,10 @@ skip_link_setup:
if (dev->data->dev_conf.intr_conf.lsc != 0)
ixgbe_dev_lsc_interrupt_setup(dev);
+ /* check if rxq interrupt is enabled */
+ if (dev->data->dev_conf.intr_conf.rxq != 0)
+ ixgbe_dev_rxq_interrupt_setup(dev);
+
/* resume enabled intr since hw reset */
ixgbe_enable_intr(dev);
@@ -2221,6 +2270,28 @@ ixgbe_dev_lsc_interrupt_setup(struct rte_eth_dev *dev)
return 0;
}
+/**
+ * It clears the interrupt causes and enables the interrupt.
+ * It will be called once only during nic initialized.
+ *
+ * @param dev
+ * Pointer to struct rte_eth_dev.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+static int
+ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev)
+{
+ struct ixgbe_interrupt *intr =
+ IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+
+ intr->mask |= IXGBE_EICR_RTX_QUEUE;
+
+ return 0;
+}
+
/*
* It reads ICR and sets flag (IXGBE_EICR_LSC) for the link_update.
*
@@ -2247,10 +2318,10 @@ ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev)
PMD_DRV_LOG(INFO, "eicr %x", eicr);
intr->flags = 0;
- if (eicr & IXGBE_EICR_LSC) {
- /* set flag for async link update */
+
+ /* set flag for async link update */
+ if (eicr & IXGBE_EICR_LSC)
intr->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
- }
if (eicr & IXGBE_EICR_MAILBOX)
intr->flags |= IXGBE_FLAG_MAILBOX;
@@ -2258,6 +2329,30 @@ ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev)
return 0;
}
+static int
+ixgbevf_dev_interrupt_get_status(struct rte_eth_dev *dev)
+{
+ uint32_t eicr;
+ struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct ixgbe_interrupt *intr =
+ IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+
+ /* clear all cause mask */
+ ixgbevf_intr_disable(hw);
+
+ /* read-on-clear nic registers here */
+ eicr = IXGBE_READ_REG(hw, IXGBE_VTEICR);
+ PMD_DRV_LOG(INFO, "eicr %x", eicr);
+
+ intr->flags = 0;
+
+ /* set flag for async link update */
+ if (eicr & IXGBE_EICR_LSC)
+ intr->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
+
+ return 0;
+}
+
/**
* It gets and then prints the link status.
*
@@ -2353,6 +2448,18 @@ ixgbe_dev_interrupt_action(struct rte_eth_dev *dev)
return 0;
}
+static int
+ixgbevf_dev_interrupt_action(struct rte_eth_dev *dev)
+{
+ struct ixgbe_hw *hw =
+ IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+ PMD_DRV_LOG(DEBUG, "enable intr immediately");
+ ixgbevf_intr_enable(hw);
+ rte_intr_enable(&(dev->pci_dev->intr_handle));
+ return 0;
+}
+
/**
* Interrupt handler which shall be registered for alarm callback for delayed
* handling specific interrupt to wait for the stable nic state. As the
@@ -2414,6 +2521,15 @@ ixgbe_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
ixgbe_dev_interrupt_action(dev);
}
+static void
+ixgbevf_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
+ void *param)
+{
+ struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
+ ixgbevf_dev_interrupt_get_status(dev);
+ ixgbevf_dev_interrupt_action(dev);
+}
+
static int
ixgbe_dev_led_on(struct rte_eth_dev *dev)
{
@@ -2912,6 +3028,19 @@ ixgbevf_intr_disable(struct ixgbe_hw *hw)
IXGBE_WRITE_FLUSH(hw);
}
+static void
+ixgbevf_intr_enable(struct ixgbe_hw *hw)
+{
+ PMD_INIT_FUNC_TRACE();
+
+ /* VF enable interrupt autoclean */
+ IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, IXGBE_VF_IRQ_ENABLE_MASK);
+ IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, IXGBE_VF_IRQ_ENABLE_MASK);
+ IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, IXGBE_VF_IRQ_ENABLE_MASK);
+
+ IXGBE_WRITE_FLUSH(hw);
+}
+
static int
ixgbevf_dev_configure(struct rte_eth_dev *dev)
{
@@ -2974,6 +3103,11 @@ ixgbevf_dev_start(struct rte_eth_dev *dev)
ixgbevf_dev_rxtx_start(dev);
+ ixgbevf_configure_msix(hw);
+
+ /* Re-enable interrupt for VF */
+ ixgbevf_intr_enable(hw);
+
return 0;
}
@@ -3511,6 +3645,229 @@ ixgbe_mirror_rule_reset(struct rte_eth_dev *dev, uint8_t rule_id)
return 0;
}
+
+static int
+ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ uint32_t mask;
+ struct ixgbe_hw *hw =
+ IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+ mask = IXGBE_READ_REG(hw, IXGBE_VTEIMS);
+ mask |= (1 << queue_id);
+ IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask);
+
+ return 0;
+}
+
+static int
+ixgbevf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ uint32_t mask;
+ struct ixgbe_hw *hw =
+ IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+ mask = IXGBE_READ_REG(hw, IXGBE_VTEIMS);
+ mask &= ~(1 << queue_id);
+ IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask);
+
+ return 0;
+}
+
+static int
+ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ uint32_t mask;
+ struct ixgbe_hw *hw =
+ IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct ixgbe_interrupt *intr =
+ IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+
+ if (queue_id < 16) {
+ ixgbe_disable_intr(hw);
+ intr->mask |= (1 << queue_id);
+ ixgbe_enable_intr(dev);
+ } else if (queue_id < 32) {
+ mask = IXGBE_READ_REG(hw, IXGBE_EIMS_EX(0));
+ mask &= (1 << queue_id);
+ IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
+ } else if (queue_id < 64) {
+ mask = IXGBE_READ_REG(hw, IXGBE_EIMS_EX(1));
+ mask &= (1 << (queue_id - 32));
+ IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
+ }
+
+ return 0;
+}
+
+static int
+ixgbe_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ uint32_t mask;
+ struct ixgbe_hw *hw =
+ IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct ixgbe_interrupt *intr =
+ IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+
+ if (queue_id < 16) {
+ ixgbe_disable_intr(hw);
+ intr->mask &= ~(1 << queue_id);
+ ixgbe_enable_intr(dev);
+ } else if (queue_id < 32) {
+ mask = IXGBE_READ_REG(hw, IXGBE_EIMS_EX(0));
+ mask &= ~(1 << queue_id);
+ IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
+ } else if (queue_id < 64) {
+ mask = IXGBE_READ_REG(hw, IXGBE_EIMS_EX(1));
+ mask &= ~(1 << (queue_id - 32));
+ IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
+ }
+
+ return 0;
+}
+
+static void
+ixgbevf_set_ivar_map(struct ixgbe_hw *hw, int8_t direction,
+ uint8_t queue, uint8_t msix_vector)
+{
+ uint32_t tmp, idx;
+ if (direction == -1) {
+ /* other causes */
+ msix_vector |= IXGBE_IVAR_ALLOC_VAL;
+ tmp = IXGBE_READ_REG(hw, IXGBE_VTIVAR_MISC);
+ tmp &= ~0xFF;
+ tmp |= msix_vector;
+ IXGBE_WRITE_REG(hw, IXGBE_VTIVAR_MISC, tmp);
+ } else {
+ /* rx or tx cause */
+ msix_vector |= IXGBE_IVAR_ALLOC_VAL;
+ idx = ((16 * (queue & 1)) + (8 * direction));
+ tmp = IXGBE_READ_REG(hw, IXGBE_VTIVAR(queue >> 1));
+ tmp &= ~(0xFF << idx);
+ tmp |= (msix_vector << idx);
+ IXGBE_WRITE_REG(hw, IXGBE_VTIVAR(queue >> 1), tmp);
+ }
+}
+
+/**
+ * set the IVAR registers, mapping interrupt causes to vectors
+ * @param hw
+ * pointer to ixgbe_hw struct
+ * @direction
+ * 0 for Rx, 1 for Tx, -1 for other causes
+ * @queue
+ * queue to map the corresponding interrupt to
+ * @msix_vector
+ * the vector to map to the corresponding queue
+ */
+static void
+ixgbe_set_ivar_map(struct ixgbe_hw *hw, int8_t direction,
+ uint8_t queue, uint8_t msix_vector)
+{
+ uint32_t tmp, idx;
+
+ msix_vector |= IXGBE_IVAR_ALLOC_VAL;
+ if (hw->mac.type == ixgbe_mac_82598EB) {
+ if (direction == -1)
+ direction = 0;
+ idx = (((direction * 64) + queue) >> 2) & 0x1F;
+ tmp = IXGBE_READ_REG(hw, IXGBE_IVAR(idx));
+ tmp &= ~(0xFF << (8 * (queue & 0x3)));
+ tmp |= (msix_vector << (8 * (queue & 0x3)));
+ IXGBE_WRITE_REG(hw, IXGBE_IVAR(idx), tmp);
+ } else if ((hw->mac.type == ixgbe_mac_82599EB) ||
+ (hw->mac.type == ixgbe_mac_X540)) {
+ if (direction == -1) {
+ /* other causes */
+ idx = ((queue & 1) * 8);
+ tmp = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
+ tmp &= ~(0xFF << idx);
+ tmp |= (msix_vector << idx);
+ IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, tmp);
+ } else {
+ /* rx or tx causes */
+ idx = ((16 * (queue & 1)) + (8 * direction));
+ tmp = IXGBE_READ_REG(hw, IXGBE_IVAR(queue >> 1));
+ tmp &= ~(0xFF << idx);
+ tmp |= (msix_vector << idx);
+ IXGBE_WRITE_REG(hw, IXGBE_IVAR(queue >> 1), tmp);
+ }
+ }
+}
+
+static void
+ixgbevf_configure_msix(struct ixgbe_hw *hw)
+{
+ uint32_t q_idx, vector_idx;
+ /* Configure all RX queues of VF */
+ for (vector_idx = 0; vector_idx < IXGBE_VF_MAXMSIVECTOR; vector_idx++) {
+ for (q_idx = 0; q_idx < (hw->mac.max_rx_queues - 1); q_idx++)
+ ixgbevf_set_ivar_map(hw, 0, q_idx, vector_idx);
+ }
+
+ /* Configure VF Rx queue ivar */
+ ixgbevf_set_ivar_map(hw, -1, 1, vector_idx);
+}
+
+/**
+ * Sets up the hardware to properly generate MSI-X interrupts
+ * @hw
+ * board private structure
+ */
+static void
+ixgbe_configure_msix(struct ixgbe_hw *hw)
+{
+ int queue_id;
+ uint32_t mask;
+ uint32_t gpie;
+
+ /* setup GPIE for MSI-x mode */
+ gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
+ gpie |= IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_PBA_SUPPORT |
+ IXGBE_GPIE_OCD | IXGBE_GPIE_EIAME;
+ /*
+ * auto clearing and auto setting corresponding bits in EIMS
+ * when MSI-X interrupt is triggered
+ */
+ if (hw->mac.type == ixgbe_mac_82598EB)
+ IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
+ else {
+ IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
+ IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
+ }
+ IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
+
+ /*
+ * Populate the IVAR table and set the ITR values to the
+ * corresponding register.
+ */
+ for (queue_id = 0; queue_id < VFIO_MAX_QUEUE_ID; queue_id++)
+ ixgbe_set_ivar_map(hw, 0, queue_id, queue_id);
+
+ switch (hw->mac.type) {
+ case ixgbe_mac_82598EB:
+ ixgbe_set_ivar_map(hw, -1, IXGBE_IVAR_OTHER_CAUSES_INDEX,
+ VFIO_MAX_QUEUE_ID);
+ break;
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ ixgbe_set_ivar_map(hw, -1, 1, VFIO_MAX_QUEUE_ID);
+ break;
+ default:
+ break;
+ }
+ IXGBE_WRITE_REG(hw, IXGBE_EITR(queue_id),
+ IXGBE_MIN_INTER_INTERRUPT_INTERVAL_DEFAULT & 0xFFF);
+
+ /* set up to autoclear timer, and the vectors */
+ mask = IXGBE_EIMS_ENABLE_MASK;
+ mask &= ~(IXGBE_EIMS_OTHER |
+ IXGBE_EIMS_MAILBOX |
+ IXGBE_EIMS_LSC);
+
+ IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
+}
+
static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev,
uint16_t queue_idx, uint16_t tx_rate)
{
diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
index 1383194..d7aab2a 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
@@ -98,6 +98,11 @@
#define IXGBE_5TUPLE_MAX_PRI 7
#define IXGBE_5TUPLE_MIN_PRI 1
+#define IXGBE_VF_IRQ_ENABLE_MASK 3 /* vf irq enable mask */
+#define IXGBE_VF_MAXMSIVECTOR 1
+/* maximum other interrupts besides rx&tx*/
+#define IXGBE_MAX_OTHER_INTR 1
+#define IXGBEVF_MAX_OTHER_INTR 1
/*
* Information about the fdir mode.
*/
@@ -260,6 +265,7 @@ uint32_t ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev,
uint16_t rx_queue_id);
int ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset);
+int ixgbevf_dev_rx_descriptor_done(void *rx_queue, uint16_t offset);
int ixgbe_dev_rx_init(struct rte_eth_dev *dev);
--
1.8.1.4
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v4 2/5] ixgbe: enable rx queue interrupts for both PF and VF
2015-02-19 13:48 ` [dpdk-dev] [PATCH v4 2/5] ixgbe: enable rx queue interrupts for both PF and VF Zhou Danny
@ 2015-02-23 11:19 ` Thomas Monjalon
2015-02-23 11:23 ` Zhou, Danny
0 siblings, 1 reply; 19+ messages in thread
From: Thomas Monjalon @ 2015-02-23 11:19 UTC (permalink / raw)
To: Zhou Danny; +Cc: dev
2015-02-19 21:48, Zhou Danny:
> + /* set max interrupt vfio request */
> + pci_dev->intr_handle.max_intr = hw->mac.max_rx_queues +
> + IXGBE_MAX_OTHER_INTR;
> +
Compilation is broken here.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v4 2/5] ixgbe: enable rx queue interrupts for both PF and VF
2015-02-23 11:19 ` Thomas Monjalon
@ 2015-02-23 11:23 ` Zhou, Danny
2015-02-23 13:20 ` Thomas Monjalon
0 siblings, 1 reply; 19+ messages in thread
From: Zhou, Danny @ 2015-02-23 11:23 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: dev
I noticed the V4 patch conflicts with the latest code on the main branch due to lots of code merged, and it
is mentioned by Jun Xu in previous email, and I will have to rebase the patch and send out V5 version.
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Monday, February 23, 2015 7:20 PM
> To: Zhou, Danny
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 2/5] ixgbe: enable rx queue interrupts for both PF and VF
>
> 2015-02-19 21:48, Zhou Danny:
> > + /* set max interrupt vfio request */
> > + pci_dev->intr_handle.max_intr = hw->mac.max_rx_queues +
> > + IXGBE_MAX_OTHER_INTR;
> > +
>
> Compilation is broken here.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v4 2/5] ixgbe: enable rx queue interrupts for both PF and VF
2015-02-23 11:23 ` Zhou, Danny
@ 2015-02-23 13:20 ` Thomas Monjalon
2015-02-23 15:11 ` Zhou, Danny
0 siblings, 1 reply; 19+ messages in thread
From: Thomas Monjalon @ 2015-02-23 13:20 UTC (permalink / raw)
To: Zhou, Danny; +Cc: dev
2015-02-23 11:23, Zhou, Danny:
> I noticed the V4 patch conflicts with the latest code on the main branch due to lots of code merged, and it
> is mentioned by Jun Xu in previous email, and I will have to rebase the patch and send out V5 version.
Maybe you misunderstood my comment.
I'm quoting an usage in patch 2 of max_intr which is defined in patch 4.
You have to better split and order your patches to make them compilable
and not break git bisect.
Thanks
> > -----Original Message-----
> > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > Sent: Monday, February 23, 2015 7:20 PM
> > To: Zhou, Danny
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v4 2/5] ixgbe: enable rx queue interrupts for both PF and VF
> >
> > 2015-02-19 21:48, Zhou Danny:
> > > + /* set max interrupt vfio request */
> > > + pci_dev->intr_handle.max_intr = hw->mac.max_rx_queues +
> > > + IXGBE_MAX_OTHER_INTR;
> > > +
> >
> > Compilation is broken here.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v4 2/5] ixgbe: enable rx queue interrupts for both PF and VF
2015-02-23 13:20 ` Thomas Monjalon
@ 2015-02-23 15:11 ` Zhou, Danny
0 siblings, 0 replies; 19+ messages in thread
From: Zhou, Danny @ 2015-02-23 15:11 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: dev
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Monday, February 23, 2015 9:20 PM
> To: Zhou, Danny
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 2/5] ixgbe: enable rx queue interrupts for both PF and VF
>
> 2015-02-23 11:23, Zhou, Danny:
> > I noticed the V4 patch conflicts with the latest code on the main branch due to lots of code merged, and it
> > is mentioned by Jun Xu in previous email, and I will have to rebase the patch and send out V5 version.
>
> Maybe you misunderstood my comment.
> I'm quoting an usage in patch 2 of max_intr which is defined in patch 4.
> You have to better split and order your patches to make them compilable
> and not break git bisect.
>
> Thanks
>
Will split-off a new patch file for rte_interrupts.h which both patch 2 and patch 4 depends on.
> > > -----Original Message-----
> > > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > Sent: Monday, February 23, 2015 7:20 PM
> > > To: Zhou, Danny
> > > Cc: dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH v4 2/5] ixgbe: enable rx queue interrupts for both PF and VF
> > >
> > > 2015-02-19 21:48, Zhou Danny:
> > > > + /* set max interrupt vfio request */
> > > > + pci_dev->intr_handle.max_intr = hw->mac.max_rx_queues +
> > > > + IXGBE_MAX_OTHER_INTR;
> > > > +
> > >
> > > Compilation is broken here.
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* [dpdk-dev] [PATCH v4 3/5] igb: enable rx queue interrupts for PF
2015-02-19 13:48 [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD Zhou Danny
2015-02-19 13:48 ` [dpdk-dev] [PATCH v4 1/5] ethdev: add rx interrupt enable/disable functions Zhou Danny
2015-02-19 13:48 ` [dpdk-dev] [PATCH v4 2/5] ixgbe: enable rx queue interrupts for both PF and VF Zhou Danny
@ 2015-02-19 13:48 ` Zhou Danny
2015-02-19 13:48 ` [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO Zhou Danny
` (2 subsequent siblings)
5 siblings, 0 replies; 19+ messages in thread
From: Zhou Danny @ 2015-02-19 13:48 UTC (permalink / raw)
To: dev
v3 changes
- Remove unnecessary variables in e1000_mac_info
- Remove spinlok from PMD
v2 changes
- Consolidate review comments related to coding style
The patch does below for igb PF:
- Setup NIC to generate MSI-X interrupts
- Set the IVAR register to map interrupt causes to vectors
- Implement interrupt enable/disable functions
Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Tested-by: Yong Liu <yong.liu@intel.com>
---
lib/librte_pmd_e1000/e1000_ethdev.h | 3 +
lib/librte_pmd_e1000/igb_ethdev.c | 228 ++++++++++++++++++++++++++++++++----
2 files changed, 206 insertions(+), 25 deletions(-)
diff --git a/lib/librte_pmd_e1000/e1000_ethdev.h b/lib/librte_pmd_e1000/e1000_ethdev.h
index d155e77..452c6bf 100644
--- a/lib/librte_pmd_e1000/e1000_ethdev.h
+++ b/lib/librte_pmd_e1000/e1000_ethdev.h
@@ -105,6 +105,9 @@
#define E1000_FTQF_QUEUE_SHIFT 16
#define E1000_FTQF_QUEUE_ENABLE 0x00000100
+/* maximum number of other interrupts besides Rx & Tx interrupts */
+#define E1000_MAX_OTHER_INTR 1
+
/* structure for interrupt relative data */
struct e1000_interrupt {
uint32_t flags;
diff --git a/lib/librte_pmd_e1000/igb_ethdev.c b/lib/librte_pmd_e1000/igb_ethdev.c
index 2a268b8..bafa332 100644
--- a/lib/librte_pmd_e1000/igb_ethdev.c
+++ b/lib/librte_pmd_e1000/igb_ethdev.c
@@ -97,6 +97,7 @@ static int eth_igb_flow_ctrl_get(struct rte_eth_dev *dev,
static int eth_igb_flow_ctrl_set(struct rte_eth_dev *dev,
struct rte_eth_fc_conf *fc_conf);
static int eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev);
+static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev);
static int eth_igb_interrupt_get_status(struct rte_eth_dev *dev);
static int eth_igb_interrupt_action(struct rte_eth_dev *dev);
static void eth_igb_interrupt_handler(struct rte_intr_handle *handle,
@@ -191,6 +192,16 @@ static int eth_igb_filter_ctrl(struct rte_eth_dev *dev,
enum rte_filter_op filter_op,
void *arg);
+static int eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev,
+ uint16_t queue_id);
+static int eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev,
+ uint16_t queue_id);
+static void eth_igb_assign_msix_vector(struct e1000_hw *hw, int8_t direction,
+ uint8_t queue, uint8_t msix_vector);
+static void eth_igb_configure_msix_intr(struct rte_eth_dev *dev);
+static void eth_igb_write_ivar(struct e1000_hw *hw, uint8_t msix_vector,
+ uint8_t index, uint8_t offset);
+
/*
* Define VF Stats MACRO for Non "cleared on read" register
*/
@@ -250,6 +261,8 @@ static struct eth_dev_ops eth_igb_ops = {
.vlan_tpid_set = eth_igb_vlan_tpid_set,
.vlan_offload_set = eth_igb_vlan_offload_set,
.rx_queue_setup = eth_igb_rx_queue_setup,
+ .rx_queue_intr_enable = eth_igb_rx_queue_intr_enable,
+ .rx_queue_intr_disable = eth_igb_rx_queue_intr_disable,
.rx_queue_release = eth_igb_rx_queue_release,
.rx_queue_count = eth_igb_rx_queue_count,
.rx_descriptor_done = eth_igb_rx_descriptor_done,
@@ -471,6 +484,7 @@ eth_igb_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
struct e1000_vfta * shadow_vfta =
E1000_DEV_PRIVATE_TO_VFTA(eth_dev->data->dev_private);
uint32_t ctrl_ext;
+ struct rte_eth_dev_info dev_info;
pci_dev = eth_dev->pci_dev;
eth_dev->dev_ops = ð_igb_ops;
@@ -592,6 +606,13 @@ eth_igb_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
eth_dev->data->port_id, pci_dev->id.vendor_id,
pci_dev->id.device_id);
+ /* set max interrupt vfio request */
+ memset(&dev_info, 0, sizeof(dev_info));
+ eth_igb_infos_get(eth_dev, &dev_info);
+
+ pci_dev->intr_handle.max_intr = dev_info.max_rx_queues +
+ E1000_MAX_OTHER_INTR;
+
rte_intr_callback_register(&(pci_dev->intr_handle),
eth_igb_interrupt_handler, (void *)eth_dev);
@@ -754,7 +775,7 @@ eth_igb_start(struct rte_eth_dev *dev)
{
struct e1000_hw *hw =
E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
- int ret, i, mask;
+ int ret, mask;
uint32_t ctrl_ext;
PMD_INIT_FUNC_TRACE();
@@ -794,6 +815,9 @@ eth_igb_start(struct rte_eth_dev *dev)
/* configure PF module if SRIOV enabled */
igb_pf_host_configure(dev);
+ /* confiugre msix for rx interrupt */
+ eth_igb_configure_msix_intr(dev);
+
/* Configure for OS presence */
igb_init_manageability(hw);
@@ -821,33 +845,9 @@ eth_igb_start(struct rte_eth_dev *dev)
igb_vmdq_vlan_hw_filter_enable(dev);
}
- /*
- * Configure the Interrupt Moderation register (EITR) with the maximum
- * possible value (0xFFFF) to minimize "System Partial Write" issued by
- * spurious [DMA] memory updates of RX and TX ring descriptors.
- *
- * With a EITR granularity of 2 microseconds in the 82576, only 7/8
- * spurious memory updates per second should be expected.
- * ((65535 * 2) / 1000.1000 ~= 0.131 second).
- *
- * Because interrupts are not used at all, the MSI-X is not activated
- * and interrupt moderation is controlled by EITR[0].
- *
- * Note that having [almost] disabled memory updates of RX and TX ring
- * descriptors through the Interrupt Moderation mechanism, memory
- * updates of ring descriptors are now moderated by the configurable
- * value of Write-Back Threshold registers.
- */
if ((hw->mac.type == e1000_82576) || (hw->mac.type == e1000_82580) ||
(hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i210) ||
(hw->mac.type == e1000_i211)) {
- uint32_t ivar;
-
- /* Enable all RX & TX queues in the IVAR registers */
- ivar = (uint32_t) ((E1000_IVAR_VALID << 16) | E1000_IVAR_VALID);
- for (i = 0; i < 8; i++)
- E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, i, ivar);
-
/* Configure EITR with the maximum possible value (0xFFFF) */
E1000_WRITE_REG(hw, E1000_EITR(0), 0xFFFF);
}
@@ -901,6 +901,10 @@ eth_igb_start(struct rte_eth_dev *dev)
if (dev->data->dev_conf.intr_conf.lsc != 0)
ret = eth_igb_lsc_interrupt_setup(dev);
+ /* check if rxq interrupt is enabled */
+ if (dev->data->dev_conf.intr_conf.rxq != 0)
+ eth_igb_rxq_interrupt_setup(dev);
+
/* resume enabled intr since hw reset */
igb_intr_enable(dev);
@@ -1796,6 +1800,34 @@ eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev)
}
/*
+ * It clears the interrupt causes and enables the interrupt.
+ * It will be called once only during nic initialized.
+ *
+ * @param dev
+ * Pointer to struct rte_eth_dev.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev)
+{
+ uint32_t mask, regval;
+ struct e1000_hw *hw =
+ E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct rte_eth_dev_info dev_info;
+
+ memset(&dev_info, 0, sizeof(dev_info));
+ eth_igb_infos_get(dev, &dev_info);
+
+ mask = 0xFFFFFFFF >> (32 - dev_info.max_rx_queues);
+ regval = E1000_READ_REG(hw, E1000_EIMS);
+ E1000_WRITE_REG(hw, E1000_EIMS, regval | mask);
+
+ return 0;
+}
+
+/*
* It reads ICR and gets interrupt causes, check it and set a bit flag
* to update link status.
*
@@ -3256,5 +3288,151 @@ static struct rte_driver pmd_igbvf_drv = {
.init = rte_igbvf_pmd_init,
};
+static int
+eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ struct e1000_hw *hw =
+ E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ uint32_t mask = 1 << queue_id;
+
+ E1000_WRITE_REG(hw, E1000_EIMC, mask);
+ E1000_WRITE_FLUSH(hw);
+
+ return 0;
+}
+
+static int
+eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ struct e1000_hw *hw =
+ E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ uint32_t mask = 1 << queue_id;
+ uint32_t regval;
+
+ regval = E1000_READ_REG(hw, E1000_EIMS);
+ E1000_WRITE_REG(hw, E1000_EIMS, regval | mask);
+ E1000_WRITE_FLUSH(hw);
+
+ return 0;
+}
+
+static void
+eth_igb_write_ivar(struct e1000_hw *hw, uint8_t msix_vector,
+ uint8_t index, uint8_t offset)
+{
+ uint32_t val = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
+
+ /* clear bits */
+ val &= ~((uint32_t)0xFF << offset);
+
+ /* write vector and valid bit */
+ val |= (msix_vector | E1000_IVAR_VALID) << offset;
+
+ E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, val);
+}
+
+static void
+eth_igb_assign_msix_vector(struct e1000_hw *hw, int8_t direction,
+ uint8_t queue, uint8_t msix_vector)
+{
+ uint32_t tmp = 0;
+ if (hw->mac.type == e1000_82575) {
+ if (direction == 0)
+ tmp = E1000_EICR_RX_QUEUE0 << queue;
+ else if (direction == 1)
+ tmp = E1000_EICR_TX_QUEUE0 << queue;
+ E1000_WRITE_REG(hw, E1000_MSIXBM(msix_vector), tmp);
+ } else if (hw->mac.type == e1000_82576) {
+ if ((direction == 0) || (direction == 1))
+ eth_igb_write_ivar(hw, msix_vector, queue & 0x7,
+ ((queue & 0x8) << 1) + 8 * direction);
+ } else if ((hw->mac.type == e1000_82580) ||
+ (hw->mac.type == e1000_i350) ||
+ (hw->mac.type == e1000_i354) ||
+ (hw->mac.type == e1000_i210) ||
+ (hw->mac.type == e1000_i211)) {
+ if ((direction == 0) || (direction == 1))
+ eth_igb_write_ivar(hw, msix_vector,
+ queue >> 1,
+ ((queue & 0x1) << 4) + 8 * direction);
+ }
+}
+
+/*
+ * Sets up the hardware to generate MSI-X interrupts properly
+ * @hw
+ * board private structure
+ */
+static void
+eth_igb_configure_msix_intr(struct rte_eth_dev *dev)
+{
+ int queue_id;
+ uint32_t tmpval, regval, intr_mask;
+ uint32_t max_rx_queues;
+ struct rte_eth_dev_info dev_info;
+ struct e1000_hw *hw =
+ E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+ memset(&dev_info, 0, sizeof(dev_info));
+ eth_igb_infos_get(dev, &dev_info);
+ max_rx_queues = dev_info.max_rx_queues;
+
+ /* set interrupt vector for other causes */
+ if (hw->mac.type == e1000_82575) {
+ tmpval = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ /* enable MSI-X PBA support */
+ tmpval |= E1000_CTRL_EXT_PBA_CLR;
+
+ /* Auto-Mask interrupts upon ICR read */
+ tmpval |= E1000_CTRL_EXT_EIAME;
+ tmpval |= E1000_CTRL_EXT_IRCA;
+
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmpval);
+
+ /* enable msix_other interrupt */
+ E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), 0, E1000_EIMS_OTHER);
+ regval = E1000_READ_REG(hw, E1000_EIAC);
+ E1000_WRITE_REG(hw, E1000_EIAC, regval | E1000_EIMS_OTHER);
+ regval = E1000_READ_REG(hw, E1000_EIAM);
+ E1000_WRITE_REG(hw, E1000_EIMS, regval | E1000_EIMS_OTHER);
+ } else if ((hw->mac.type == e1000_82576) ||
+ (hw->mac.type == e1000_82580) ||
+ (hw->mac.type == e1000_i350) ||
+ (hw->mac.type == e1000_i354) ||
+ (hw->mac.type == e1000_i210) ||
+ (hw->mac.type == e1000_i211)) {
+ /* turn on MSI-X capability first */
+ E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE |
+ E1000_GPIE_PBA | E1000_GPIE_EIAME |
+ E1000_GPIE_NSICR);
+
+ /* enable msix_other interrupt */
+ intr_mask = 1 << max_rx_queues;
+ regval = E1000_READ_REG(hw, E1000_EIAC);
+ E1000_WRITE_REG(hw, E1000_EIAC, regval | intr_mask);
+ regval = E1000_READ_REG(hw, E1000_EIMS);
+ E1000_WRITE_REG(hw, E1000_EIMS, regval | intr_mask);
+ tmpval = (max_rx_queues | E1000_IVAR_VALID) << 8;
+
+ E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmpval);
+ }
+
+ /*
+ * use EIAM and EIAC to auto-mask and auto-clear when MSI-X interrupt
+ * is asserted, this saves a register write for every interrupt
+ */
+ intr_mask = 0xFFFFFFFF >> (32 - max_rx_queues);
+ regval = E1000_READ_REG(hw, E1000_EIAC);
+ E1000_WRITE_REG(hw, E1000_EIAC, regval | intr_mask);
+ regval = E1000_READ_REG(hw, E1000_EIAM);
+ E1000_WRITE_REG(hw, E1000_EIAM, regval | intr_mask);
+
+ for (queue_id = 0; queue_id < VFIO_MAX_QUEUE_ID; queue_id++)
+ eth_igb_assign_msix_vector(hw, 0, queue_id, queue_id);
+
+ E1000_WRITE_FLUSH(hw);
+}
+
+
PMD_REGISTER_DRIVER(pmd_igb_drv);
PMD_REGISTER_DRIVER(pmd_igbvf_drv);
--
1.8.1.4
^ permalink raw reply [flat|nested] 19+ messages in thread
* [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO
2015-02-19 13:48 [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD Zhou Danny
` (2 preceding siblings ...)
2015-02-19 13:48 ` [dpdk-dev] [PATCH v4 3/5] igb: enable rx queue interrupts for PF Zhou Danny
@ 2015-02-19 13:48 ` Zhou Danny
2015-02-20 22:43 ` Thomas Monjalon
2015-02-21 2:56 ` Jun Xiao
2015-02-19 13:48 ` [dpdk-dev] [PATCH v4 5/5] l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch Zhou Danny
2015-02-20 8:50 ` [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD Gonzalez Monroy, Sergio
5 siblings, 2 replies; 19+ messages in thread
From: Zhou Danny @ 2015-02-19 13:48 UTC (permalink / raw)
To: dev
v4 changes:
- Adjust position of new-added structure fields
v3 changes:
- Fix review comments
v2 changes:
- Fix compilation issue for a missed header file
- Bug fix: free unreleased resources on the exception path before return
- Consolidate coding style related review comments
This patch does below:
- Create multiple VFIO eventfd for rx queues.
- Handle per rx queue interrupt.
- Eliminate unnecessary suspended DPDK polling thread wakeup mechanism
for rx interrupt by allowing polling thread epoll_wait rx queue
interrupt notification.
Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Tested-by: Yong Liu <yong.liu@intel.com>
---
lib/librte_eal/common/include/rte_eal.h | 12 ++
lib/librte_eal/linuxapp/eal/Makefile | 1 +
lib/librte_eal/linuxapp/eal/eal_interrupts.c | 190 ++++++++++++++++-----
lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 12 +-
.../linuxapp/eal/include/exec-env/rte_interrupts.h | 4 +
5 files changed, 175 insertions(+), 44 deletions(-)
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index f4ecd2e..d81331f 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -150,6 +150,18 @@ int rte_eal_iopl_init(void);
* - On failure, a negative error value.
*/
int rte_eal_init(int argc, char **argv);
+
+/**
+ * @param port_id
+ * the port id
+ * @param queue_id
+ * the queue id
+ * @return
+ * - On success, return 0
+ * - On failure, returns -1.
+ */
+int rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id);
+
/**
* Usage function typedef used by the application usage function.
*
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index e117cec..c593dfa 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -43,6 +43,7 @@ CFLAGS += -I$(SRCDIR)/include
CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
CFLAGS += -I$(RTE_SDK)/lib/librte_ring
+CFLAGS += -I$(RTE_SDK)/lib/librte_mbuf
CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
CFLAGS += -I$(RTE_SDK)/lib/librte_malloc
CFLAGS += -I$(RTE_SDK)/lib/librte_ether
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index dc2668a..97215ad 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -64,6 +64,7 @@
#include <rte_malloc.h>
#include <rte_errno.h>
#include <rte_spinlock.h>
+#include <rte_ethdev.h>
#include "eal_private.h"
#include "eal_vfio.h"
@@ -127,6 +128,9 @@ static pthread_t intr_thread;
#ifdef VFIO_PRESENT
#define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int))
+/* irq set buffer length for queue interrupts and LSC interrupt */
+#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
+ sizeof(int) * (VFIO_MAX_QUEUE_ID + 1))
/* enable legacy (INTx) interrupts */
static int
@@ -218,10 +222,10 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) {
return 0;
}
-/* enable MSI-X interrupts */
+/* enable MSI interrupts */
static int
vfio_enable_msi(struct rte_intr_handle *intr_handle) {
- int len, ret;
+ int len, ret, max_intr;
char irq_set_buf[IRQ_SET_BUF_LEN];
struct vfio_irq_set *irq_set;
int *fd_ptr;
@@ -230,12 +234,19 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
irq_set = (struct vfio_irq_set *) irq_set_buf;
irq_set->argsz = len;
- irq_set->count = 1;
+ if ((!intr_handle->max_intr) ||
+ (intr_handle->max_intr > VFIO_MAX_QUEUE_ID))
+ max_intr = VFIO_MAX_QUEUE_ID + 1;
+ else
+ max_intr = intr_handle->max_intr;
+
+ irq_set->count = max_intr;
irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
irq_set->start = 0;
fd_ptr = (int *) &irq_set->data;
- *fd_ptr = intr_handle->fd;
+ memcpy(fd_ptr, intr_handle->queue_fd, sizeof(intr_handle->queue_fd));
+ fd_ptr[max_intr - 1] = intr_handle->fd;
ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
@@ -244,27 +255,10 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
intr_handle->fd);
return -1;
}
-
- /* manually trigger interrupt to enable it */
- memset(irq_set, 0, len);
- len = sizeof(struct vfio_irq_set);
- irq_set->argsz = len;
- irq_set->count = 1;
- irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
- irq_set->start = 0;
-
- ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
-
- if (ret) {
- RTE_LOG(ERR, EAL, "Error triggering MSI interrupts for fd %d\n",
- intr_handle->fd);
- return -1;
- }
return 0;
}
-/* disable MSI-X interrupts */
+/* disable MSI interrupts */
static int
vfio_disable_msi(struct rte_intr_handle *intr_handle) {
struct vfio_irq_set *irq_set;
@@ -292,8 +286,8 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) {
/* enable MSI-X interrupts */
static int
vfio_enable_msix(struct rte_intr_handle *intr_handle) {
- int len, ret;
- char irq_set_buf[IRQ_SET_BUF_LEN];
+ int len, ret, max_intr;
+ char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
struct vfio_irq_set *irq_set;
int *fd_ptr;
@@ -301,12 +295,19 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
irq_set = (struct vfio_irq_set *) irq_set_buf;
irq_set->argsz = len;
- irq_set->count = 1;
+ if ((!intr_handle->max_intr) ||
+ (intr_handle->max_intr > VFIO_MAX_QUEUE_ID))
+ max_intr = VFIO_MAX_QUEUE_ID + 1;
+ else
+ max_intr = intr_handle->max_intr;
+
+ irq_set->count = max_intr;
irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
irq_set->start = 0;
fd_ptr = (int *) &irq_set->data;
- *fd_ptr = intr_handle->fd;
+ memcpy(fd_ptr, intr_handle->queue_fd, sizeof(intr_handle->queue_fd));
+ fd_ptr[max_intr - 1] = intr_handle->fd;
ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
@@ -316,22 +317,6 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
return -1;
}
- /* manually trigger interrupt to enable it */
- memset(irq_set, 0, len);
- len = sizeof(struct vfio_irq_set);
- irq_set->argsz = len;
- irq_set->count = 1;
- irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
- irq_set->start = 0;
-
- ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
-
- if (ret) {
- RTE_LOG(ERR, EAL, "Error triggering MSI-X interrupts for fd %d\n",
- intr_handle->fd);
- return -1;
- }
return 0;
}
@@ -339,7 +324,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
static int
vfio_disable_msix(struct rte_intr_handle *intr_handle) {
struct vfio_irq_set *irq_set;
- char irq_set_buf[IRQ_SET_BUF_LEN];
+ char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
int len, ret;
len = sizeof(struct vfio_irq_set);
@@ -824,3 +809,122 @@ rte_eal_intr_init(void)
return -ret;
}
+static void
+eal_intr_process_rx_interrupts(uint8_t port_id,
+ struct epoll_event *events, int nfds)
+{
+ int n, bytes_read;
+ union rte_intr_read_buffer buf;
+ struct rte_intr_handle intr_handle =
+ rte_eth_devices[port_id].pci_dev->intr_handle;
+
+ for (n = 0; n < nfds; n++) {
+ /* set the length to be read for different handle type */
+ switch (intr_handle.type) {
+ case RTE_INTR_HANDLE_UIO:
+ bytes_read = sizeof(buf.uio_intr_count);
+ break;
+ case RTE_INTR_HANDLE_ALARM:
+ bytes_read = sizeof(buf.timerfd_num);
+ break;
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ bytes_read = sizeof(buf.vfio_intr_count);
+ break;
+#endif
+ default:
+ bytes_read = 1;
+ break;
+ }
+
+ /**
+ * read out to clear the ready-to-be-read flag
+ * for epoll_wait.
+ */
+ bytes_read = read(events[n].data.fd, &buf, bytes_read);
+ if (bytes_read < 0)
+ RTE_LOG(ERR, EAL, "Error reading from file "
+ "descriptor %d: %s\n", events[n].data.fd,
+ strerror(errno));
+ else if (bytes_read == 0)
+ RTE_LOG(ERR, EAL, "Read nothing from file "
+ "descriptor %d\n", events[n].data.fd);
+ }
+}
+
+static void
+eal_intr_handle_rx_interrupts(uint8_t port_id, int pfd, unsigned totalfds)
+{
+ struct epoll_event events[totalfds];
+ int nfds = 0;
+
+ do {
+ nfds = epoll_wait(pfd, events, totalfds,
+ EAL_INTR_EPOLL_WAIT_FOREVER);
+ /* epoll_wait fail */
+ if (nfds < 0) {
+ RTE_LOG(ERR, EAL,
+ "epoll_wait returns with fail\n");
+ return;
+ }
+ } while (nfds == 0);
+
+ /* epoll_wait has at least one fd ready to read */
+ eal_intr_process_rx_interrupts(port_id, events, nfds);
+}
+
+int
+rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id)
+{
+ struct rte_intr_handle intr_handle =
+ rte_eth_devices[port_id].pci_dev->intr_handle;
+ struct epoll_event ev;
+ unsigned numfds = 0;
+
+ /* create epoll fd */
+ int pfd = epoll_create(1);
+ if (pfd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot create epoll instance\n");
+ return -1;
+ }
+
+ rte_spinlock_lock(&intr_lock);
+
+ ev.events = EPOLLIN | EPOLLPRI;
+ switch (intr_handle.type) {
+ case RTE_INTR_HANDLE_UIO:
+ ev.data.fd = intr_handle.fd;
+ break;
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ ev.data.fd = intr_handle.queue_fd[queue_id];
+ break;
+#endif
+ default:
+ rte_spinlock_unlock(&intr_lock);
+ close(pfd);
+ return -1;
+ }
+
+ if (epoll_ctl(pfd, EPOLL_CTL_ADD, ev.data.fd, &ev) < 0) {
+ RTE_LOG(ERR, EAL, "Error adding fd %d epoll_ctl, %s\n",
+ intr_handle.queue_fd[queue_id], strerror(errno));
+ } else
+ numfds++;
+
+ rte_spinlock_unlock(&intr_lock);
+ /* serve the interrupt */
+ eal_intr_handle_rx_interrupts(port_id, pfd, numfds);
+
+ /**
+ * when we return, we need to rebuild the
+ * list of fds to monitor.
+ */
+ close(pfd);
+
+ return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index 20e0977..0e5fa76 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -283,11 +283,21 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
dev->intr_handle.fd = fd;
dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
-
switch (i) {
case VFIO_PCI_MSIX_IRQ_INDEX:
internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX;
+ for (i = 0; i < VFIO_MAX_QUEUE_ID; i++) {
+ fd = eventfd(0, 0);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL,
+ "cannot setup eventfd,"
+ "error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+ dev->intr_handle.queue_fd[i] = fd;
+ }
break;
case VFIO_PCI_MSI_IRQ_INDEX:
internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI;
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
index 23eafd9..3cab3ce 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
@@ -38,6 +38,8 @@
#ifndef _RTE_LINUXAPP_INTERRUPTS_H_
#define _RTE_LINUXAPP_INTERRUPTS_H_
+#define VFIO_MAX_QUEUE_ID 32
+
enum rte_intr_handle_type {
RTE_INTR_HANDLE_UNKNOWN = 0,
RTE_INTR_HANDLE_UIO, /**< uio device handle */
@@ -53,6 +55,8 @@ struct rte_intr_handle {
int vfio_dev_fd; /**< VFIO device file descriptor */
int fd; /**< file descriptor */
enum rte_intr_handle_type type; /**< handle type */
+ int max_intr; /**< max interrupt requested */
+ int queue_fd[VFIO_MAX_QUEUE_ID]; /**< rx and tx queue interrupt file descriptor */
};
#endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */
--
1.8.1.4
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO
2015-02-19 13:48 ` [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO Zhou Danny
@ 2015-02-20 22:43 ` Thomas Monjalon
2015-02-23 11:47 ` Zhou, Danny
2015-02-21 2:56 ` Jun Xiao
1 sibling, 1 reply; 19+ messages in thread
From: Thomas Monjalon @ 2015-02-20 22:43 UTC (permalink / raw)
To: Zhou Danny, Sergio Gonzalez Monroy; +Cc: dev
2015-02-19 21:48, Zhou Danny:
> v4 changes:
> - Adjust position of new-added structure fields
>
> v3 changes:
> - Fix review comments
>
> v2 changes:
> - Fix compilation issue for a missed header file
> - Bug fix: free unreleased resources on the exception path before return
> - Consolidate coding style related review comments
>
> This patch does below:
> - Create multiple VFIO eventfd for rx queues.
> - Handle per rx queue interrupt.
> - Eliminate unnecessary suspended DPDK polling thread wakeup mechanism
> for rx interrupt by allowing polling thread epoll_wait rx queue
> interrupt notification.
>
> Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> Tested-by: Yong Liu <yong.liu@intel.com>
[...]
> --- a/lib/librte_eal/linuxapp/eal/Makefile
> +++ b/lib/librte_eal/linuxapp/eal/Makefile
> @@ -43,6 +43,7 @@ CFLAGS += -I$(SRCDIR)/include
> CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
> CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
> CFLAGS += -I$(RTE_SDK)/lib/librte_ring
> +CFLAGS += -I$(RTE_SDK)/lib/librte_mbuf
> CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
> CFLAGS += -I$(RTE_SDK)/lib/librte_malloc
> CFLAGS += -I$(RTE_SDK)/lib/librte_ether
Why do we need mbuf in EAL?
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO
2015-02-20 22:43 ` Thomas Monjalon
@ 2015-02-23 11:47 ` Zhou, Danny
2015-02-23 13:28 ` Thomas Monjalon
0 siblings, 1 reply; 19+ messages in thread
From: Zhou, Danny @ 2015-02-23 11:47 UTC (permalink / raw)
To: Thomas Monjalon, Gonzalez Monroy, Sergio; +Cc: dev
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Saturday, February 21, 2015 6:44 AM
> To: Zhou, Danny; Gonzalez Monroy, Sergio
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO
>
> 2015-02-19 21:48, Zhou Danny:
> > v4 changes:
> > - Adjust position of new-added structure fields
> >
> > v3 changes:
> > - Fix review comments
> >
> > v2 changes:
> > - Fix compilation issue for a missed header file
> > - Bug fix: free unreleased resources on the exception path before return
> > - Consolidate coding style related review comments
> >
> > This patch does below:
> > - Create multiple VFIO eventfd for rx queues.
> > - Handle per rx queue interrupt.
> > - Eliminate unnecessary suspended DPDK polling thread wakeup mechanism
> > for rx interrupt by allowing polling thread epoll_wait rx queue
> > interrupt notification.
> >
> > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > Tested-by: Yong Liu <yong.liu@intel.com>
> [...]
> > --- a/lib/librte_eal/linuxapp/eal/Makefile
> > +++ b/lib/librte_eal/linuxapp/eal/Makefile
> > @@ -43,6 +43,7 @@ CFLAGS += -I$(SRCDIR)/include
> > CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
> > CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
> > CFLAGS += -I$(RTE_SDK)/lib/librte_ring
> > +CFLAGS += -I$(RTE_SDK)/lib/librte_mbuf
> > CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
> > CFLAGS += -I$(RTE_SDK)/lib/librte_malloc
> > CFLAGS += -I$(RTE_SDK)/lib/librte_ether
>
> Why do we need mbuf in EAL?
The file eal_interrupts.c includes rte_ethdev.h which defines structure rte_eth_devices that
eal needs to use in order to get per-port intr_handle. The rte_ethdev.h includes the rte_mbuf.h
so the Makefile is updated here.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO
2015-02-23 11:47 ` Zhou, Danny
@ 2015-02-23 13:28 ` Thomas Monjalon
2015-02-23 15:02 ` Zhou, Danny
0 siblings, 1 reply; 19+ messages in thread
From: Thomas Monjalon @ 2015-02-23 13:28 UTC (permalink / raw)
To: Zhou, Danny; +Cc: dev
2015-02-23 11:47, Zhou, Danny:
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > 2015-02-19 21:48, Zhou Danny:
> > > --- a/lib/librte_eal/linuxapp/eal/Makefile
> > > +++ b/lib/librte_eal/linuxapp/eal/Makefile
> > > @@ -43,6 +43,7 @@ CFLAGS += -I$(SRCDIR)/include
> > > CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
> > > CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
> > > CFLAGS += -I$(RTE_SDK)/lib/librte_ring
> > > +CFLAGS += -I$(RTE_SDK)/lib/librte_mbuf
> > > CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
> > > CFLAGS += -I$(RTE_SDK)/lib/librte_malloc
> > > CFLAGS += -I$(RTE_SDK)/lib/librte_ether
> >
> > Why do we need mbuf in EAL?
>
> The file eal_interrupts.c includes rte_ethdev.h which defines structure rte_eth_devices that
> eal needs to use in order to get per-port intr_handle. The rte_ethdev.h includes the rte_mbuf.h
> so the Makefile is updated here.
I see. You are breaking layer isolation by introducing ethdev in EAL.
The cause seems to be:
+ struct rte_intr_handle intr_handle =
+ rte_eth_devices[port_id].pci_dev->intr_handle;
Maybe that pci_dev should be a parameter of the function.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO
2015-02-23 13:28 ` Thomas Monjalon
@ 2015-02-23 15:02 ` Zhou, Danny
2015-02-23 15:19 ` Thomas Monjalon
0 siblings, 1 reply; 19+ messages in thread
From: Zhou, Danny @ 2015-02-23 15:02 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: dev
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Monday, February 23, 2015 9:28 PM
> To: Zhou, Danny
> Cc: Gonzalez Monroy, Sergio; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO
>
> 2015-02-23 11:47, Zhou, Danny:
> > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > 2015-02-19 21:48, Zhou Danny:
> > > > --- a/lib/librte_eal/linuxapp/eal/Makefile
> > > > +++ b/lib/librte_eal/linuxapp/eal/Makefile
> > > > @@ -43,6 +43,7 @@ CFLAGS += -I$(SRCDIR)/include
> > > > CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
> > > > CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
> > > > CFLAGS += -I$(RTE_SDK)/lib/librte_ring
> > > > +CFLAGS += -I$(RTE_SDK)/lib/librte_mbuf
> > > > CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
> > > > CFLAGS += -I$(RTE_SDK)/lib/librte_malloc
> > > > CFLAGS += -I$(RTE_SDK)/lib/librte_ether
> > >
> > > Why do we need mbuf in EAL?
> >
> > The file eal_interrupts.c includes rte_ethdev.h which defines structure rte_eth_devices that
> > eal needs to use in order to get per-port intr_handle. The rte_ethdev.h includes the rte_mbuf.h
> > so the Makefile is updated here.
>
> I see. You are breaking layer isolation by introducing ethdev in EAL.
> The cause seems to be:
>
> + struct rte_intr_handle intr_handle =
> + rte_eth_devices[port_id].pci_dev->intr_handle;
>
> Maybe that pci_dev should be a parameter of the function.
Adding pci_dev as a parameter has similar problem due to eal does not include rte_pci.h which
defines struct rte_pci_device. It looks the new-added function rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id);
is not proper to be declared in rte_eal.h, will rename it to
rte_intr_wait_rx (struct rte_intr_handle *intr_handle, uint8_t queue_id);
and then move declaration from rte_eal.h to rte_interrupts.h. So isolation can be avoided and no need to includes rte_ethdev.h
and change Makefile.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO
2015-02-23 15:02 ` Zhou, Danny
@ 2015-02-23 15:19 ` Thomas Monjalon
2015-02-23 15:24 ` Zhou, Danny
0 siblings, 1 reply; 19+ messages in thread
From: Thomas Monjalon @ 2015-02-23 15:19 UTC (permalink / raw)
To: Zhou, Danny; +Cc: dev
2015-02-23 15:02, Zhou, Danny:
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > 2015-02-23 11:47, Zhou, Danny:
> > > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > > 2015-02-19 21:48, Zhou Danny:
> > > > > --- a/lib/librte_eal/linuxapp/eal/Makefile
> > > > > +++ b/lib/librte_eal/linuxapp/eal/Makefile
> > > > > @@ -43,6 +43,7 @@ CFLAGS += -I$(SRCDIR)/include
> > > > > CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
> > > > > CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
> > > > > CFLAGS += -I$(RTE_SDK)/lib/librte_ring
> > > > > +CFLAGS += -I$(RTE_SDK)/lib/librte_mbuf
> > > > > CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
> > > > > CFLAGS += -I$(RTE_SDK)/lib/librte_malloc
> > > > > CFLAGS += -I$(RTE_SDK)/lib/librte_ether
> > > >
> > > > Why do we need mbuf in EAL?
> > >
> > > The file eal_interrupts.c includes rte_ethdev.h which defines structure rte_eth_devices that
> > > eal needs to use in order to get per-port intr_handle. The rte_ethdev.h includes the rte_mbuf.h
> > > so the Makefile is updated here.
> >
> > I see. You are breaking layer isolation by introducing ethdev in EAL.
> > The cause seems to be:
> >
> > + struct rte_intr_handle intr_handle =
> > + rte_eth_devices[port_id].pci_dev->intr_handle;
> >
> > Maybe that pci_dev should be a parameter of the function.
>
> Adding pci_dev as a parameter has similar problem due to eal does not include rte_pci.h which
I don't understand your assertion. rte_pci.h is part of EAL.
> defines struct rte_pci_device. It looks the new-added function rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id);
> is not proper to be declared in rte_eal.h, will rename it to
> rte_intr_wait_rx (struct rte_intr_handle *intr_handle, uint8_t queue_id);
>
> and then move declaration from rte_eal.h to rte_interrupts.h. So isolation can be avoided and no need to includes rte_ethdev.h
> and change Makefile.
Yes could be better.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO
2015-02-23 15:19 ` Thomas Monjalon
@ 2015-02-23 15:24 ` Zhou, Danny
0 siblings, 0 replies; 19+ messages in thread
From: Zhou, Danny @ 2015-02-23 15:24 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: dev
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Monday, February 23, 2015 11:19 PM
> To: Zhou, Danny
> Cc: Gonzalez Monroy, Sergio; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO
>
> 2015-02-23 15:02, Zhou, Danny:
> > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > 2015-02-23 11:47, Zhou, Danny:
> > > > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > > > 2015-02-19 21:48, Zhou Danny:
> > > > > > --- a/lib/librte_eal/linuxapp/eal/Makefile
> > > > > > +++ b/lib/librte_eal/linuxapp/eal/Makefile
> > > > > > @@ -43,6 +43,7 @@ CFLAGS += -I$(SRCDIR)/include
> > > > > > CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
> > > > > > CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
> > > > > > CFLAGS += -I$(RTE_SDK)/lib/librte_ring
> > > > > > +CFLAGS += -I$(RTE_SDK)/lib/librte_mbuf
> > > > > > CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
> > > > > > CFLAGS += -I$(RTE_SDK)/lib/librte_malloc
> > > > > > CFLAGS += -I$(RTE_SDK)/lib/librte_ether
> > > > >
> > > > > Why do we need mbuf in EAL?
> > > >
> > > > The file eal_interrupts.c includes rte_ethdev.h which defines structure rte_eth_devices that
> > > > eal needs to use in order to get per-port intr_handle. The rte_ethdev.h includes the rte_mbuf.h
> > > > so the Makefile is updated here.
> > >
> > > I see. You are breaking layer isolation by introducing ethdev in EAL.
> > > The cause seems to be:
> > >
> > > + struct rte_intr_handle intr_handle =
> > > + rte_eth_devices[port_id].pci_dev->intr_handle;
> > >
> > > Maybe that pci_dev should be a parameter of the function.
> >
> > Adding pci_dev as a parameter has similar problem due to eal does not include rte_pci.h which
>
> I don't understand your assertion. rte_pci.h is part of EAL.
>
rte_eal.h does not include any DPDK header file, adding pci_dev would force it to include rte_pci.h file.
With solution below, those kinds of mess could be completely avoided.
> > defines struct rte_pci_device. It looks the new-added function rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id);
> > is not proper to be declared in rte_eal.h, will rename it to
> > rte_intr_wait_rx (struct rte_intr_handle *intr_handle, uint8_t queue_id);
> >
> > and then move declaration from rte_eal.h to rte_interrupts.h. So isolation can be avoided and no need to includes
> rte_ethdev.h
> > and change Makefile.
>
> Yes could be better.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO
2015-02-19 13:48 ` [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO Zhou Danny
2015-02-20 22:43 ` Thomas Monjalon
@ 2015-02-21 2:56 ` Jun Xiao
2015-02-23 11:23 ` Zhou, Danny
1 sibling, 1 reply; 19+ messages in thread
From: Jun Xiao @ 2015-02-21 2:56 UTC (permalink / raw)
To: Zhou Danny; +Cc: dev
On 02/19, Zhou Danny wrote:
> v4 changes:
> - Adjust position of new-added structure fields
>
> v3 changes:
> - Fix review comments
>
> v2 changes:
> - Fix compilation issue for a missed header file
> - Bug fix: free unreleased resources on the exception path before return
> - Consolidate coding style related review comments
>
> This patch does below:
> - Create multiple VFIO eventfd for rx queues.
> - Handle per rx queue interrupt.
> - Eliminate unnecessary suspended DPDK polling thread wakeup mechanism
> for rx interrupt by allowing polling thread epoll_wait rx queue
> interrupt notification.
>
> Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> Tested-by: Yong Liu <yong.liu@intel.com>
> ---
> lib/librte_eal/common/include/rte_eal.h | 12 ++
> lib/librte_eal/linuxapp/eal/Makefile | 1 +
> lib/librte_eal/linuxapp/eal/eal_interrupts.c | 190 ++++++++++++++++-----
> lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 12 +-
> .../linuxapp/eal/include/exec-env/rte_interrupts.h | 4 +
Hi Danny,
Could you rebase the patch to your commit 4a499c64959, otherwise
rte_interrupts.h cannot be applied.
Thanks,
Jun
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO
2015-02-21 2:56 ` Jun Xiao
@ 2015-02-23 11:23 ` Zhou, Danny
0 siblings, 0 replies; 19+ messages in thread
From: Zhou, Danny @ 2015-02-23 11:23 UTC (permalink / raw)
To: Jun Xiao; +Cc: dev
> -----Original Message-----
> From: Jun Xiao [mailto:jun.xiao@cloudnetengine.com]
> Sent: Saturday, February 21, 2015 10:57 AM
> To: Zhou, Danny
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO
>
> On 02/19, Zhou Danny wrote:
> > v4 changes:
> > - Adjust position of new-added structure fields
> >
> > v3 changes:
> > - Fix review comments
> >
> > v2 changes:
> > - Fix compilation issue for a missed header file
> > - Bug fix: free unreleased resources on the exception path before return
> > - Consolidate coding style related review comments
> >
> > This patch does below:
> > - Create multiple VFIO eventfd for rx queues.
> > - Handle per rx queue interrupt.
> > - Eliminate unnecessary suspended DPDK polling thread wakeup mechanism
> > for rx interrupt by allowing polling thread epoll_wait rx queue
> > interrupt notification.
> >
> > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > Tested-by: Yong Liu <yong.liu@intel.com>
> > ---
> > lib/librte_eal/common/include/rte_eal.h | 12 ++
> > lib/librte_eal/linuxapp/eal/Makefile | 1 +
> > lib/librte_eal/linuxapp/eal/eal_interrupts.c | 190 ++++++++++++++++-----
> > lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 12 +-
> > .../linuxapp/eal/include/exec-env/rte_interrupts.h | 4 +
>
> Hi Danny,
>
> Could you rebase the patch to your commit 4a499c64959, otherwise
> rte_interrupts.h cannot be applied.
>
Ok, will do it.
> Thanks,
> Jun
^ permalink raw reply [flat|nested] 19+ messages in thread
* [dpdk-dev] [PATCH v4 5/5] l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch
2015-02-19 13:48 [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD Zhou Danny
` (3 preceding siblings ...)
2015-02-19 13:48 ` [dpdk-dev] [PATCH v4 4/5] eal: add per rx queue interrupt handling based on VFIO Zhou Danny
@ 2015-02-19 13:48 ` Zhou Danny
2015-02-20 8:50 ` [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD Gonzalez Monroy, Sergio
5 siblings, 0 replies; 19+ messages in thread
From: Zhou Danny @ 2015-02-19 13:48 UTC (permalink / raw)
To: dev
v3 changes
- Add spinlock to ensure thread safe when accessing interrupt mask
register
v2 changes
- Remove unused function which is for debug purpose
Demonstrate how to handle per rx queue interrupt in a NAPI-like
implementation in usersapce. PDK polling thread mainly works in
polling mode and switch to interrupt mode only if there is no
any packet received in recent polls.
Usersapce interrupt notification generally takes a lot more cycles
than kernel, so one-shot interrupt is used here to guarantee minimum
overhead and DPDK polling thread returns to polling mode immediately
once it receives an interrupt notificaiton for incoming packet.
Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Tested-by: Yong Liu <yong.liu@intel.com>
---
examples/l3fwd-power/main.c | 153 ++++++++++++++++++++++++++++++++------------
1 file changed, 112 insertions(+), 41 deletions(-)
diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
index f6b55b9..d482710 100644
--- a/examples/l3fwd-power/main.c
+++ b/examples/l3fwd-power/main.c
@@ -75,12 +75,14 @@
#include <rte_string_fns.h>
#include <rte_timer.h>
#include <rte_power.h>
+#include <rte_eal.h>
+#include <rte_spinlock.h>
#define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1
#define MAX_PKT_BURST 32
-#define MIN_ZERO_POLL_COUNT 5
+#define MIN_ZERO_POLL_COUNT 10
/* around 100ms at 2 Ghz */
#define TIMER_RESOLUTION_CYCLES 200000000ULL
@@ -156,6 +158,9 @@ static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
/* ethernet addresses of ports */
static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+/* ethernet addresses of ports */
+static rte_spinlock_t locks[RTE_MAX_ETHPORTS];
+
/* mask of enabled ports */
static uint32_t enabled_port_mask = 0;
/* Ports set in promiscuous mode off by default. */
@@ -188,6 +193,9 @@ struct lcore_rx_queue {
#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
#define MAX_RX_QUEUE_PER_PORT 128
+#define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16
+
+
#define MAX_LCORE_PARAMS 1024
struct lcore_params {
uint8_t port_id;
@@ -214,7 +222,7 @@ static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
static struct rte_eth_conf port_conf = {
.rxmode = {
- .mq_mode = ETH_MQ_RX_RSS,
+ .mq_mode = ETH_MQ_RX_RSS,
.max_rx_pkt_len = ETHER_MAX_LEN,
.split_hdr_size = 0,
.header_split = 0, /**< Header Split disabled */
@@ -226,11 +234,14 @@ static struct rte_eth_conf port_conf = {
.rx_adv_conf = {
.rss_conf = {
.rss_key = NULL,
- .rss_hf = ETH_RSS_IP,
+ .rss_hf = ETH_RSS_UDP,
},
},
.txmode = {
- .mq_mode = ETH_DCB_NONE,
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+ .intr_conf = {
+ .rxq = 1, /**< rxq interrupt feature enabled */
},
};
@@ -402,19 +413,22 @@ power_timer_cb(__attribute__((unused)) struct rte_timer *tim,
/* accumulate total execution time in us when callback is invoked */
sleep_time_ratio = (float)(stats[lcore_id].sleep_time) /
(float)SCALING_PERIOD;
-
/**
* check whether need to scale down frequency a step if it sleep a lot.
*/
- if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD)
- rte_power_freq_down(lcore_id);
+ if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) {
+ if (rte_power_freq_down)
+ rte_power_freq_down(lcore_id);
+ }
else if ( (unsigned)(stats[lcore_id].nb_rx_processed /
- stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST)
+ stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) {
/**
* scale down a step if average packet per iteration less
* than expectation.
*/
- rte_power_freq_down(lcore_id);
+ if (rte_power_freq_down)
+ rte_power_freq_down(lcore_id);
+ }
/**
* initialize another timer according to current frequency to ensure
@@ -707,22 +721,20 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid,
}
-#define SLEEP_GEAR1_THRESHOLD 100
-#define SLEEP_GEAR2_THRESHOLD 1000
+#define MINIMUM_SLEEP_TIME 1
+#define SUSPEND_THRESHOLD 300
static inline uint32_t
power_idle_heuristic(uint32_t zero_rx_packet_count)
{
- /* If zero count is less than 100, use it as the sleep time in us */
- if (zero_rx_packet_count < SLEEP_GEAR1_THRESHOLD)
- return zero_rx_packet_count;
- /* If zero count is less than 1000, sleep time should be 100 us */
- else if ((zero_rx_packet_count >= SLEEP_GEAR1_THRESHOLD) &&
- (zero_rx_packet_count < SLEEP_GEAR2_THRESHOLD))
- return SLEEP_GEAR1_THRESHOLD;
- /* If zero count is greater than 1000, sleep time should be 1000 us */
- else if (zero_rx_packet_count >= SLEEP_GEAR2_THRESHOLD)
- return SLEEP_GEAR2_THRESHOLD;
+ /* If zero count is less than 100, sleep 1us */
+ if (zero_rx_packet_count < SUSPEND_THRESHOLD)
+ return MINIMUM_SLEEP_TIME;
+ /* If zero count is less than 1000, sleep 100 us which is the
+ minimum latency switching from C3/C6 to C0
+ */
+ else
+ return SUSPEND_THRESHOLD;
return 0;
}
@@ -762,6 +774,40 @@ power_freq_scaleup_heuristic(unsigned lcore_id,
return FREQ_CURRENT;
}
+/**
+ * force polling thread sleep until one-shot rx interrupt triggers
+ * @param port_id
+ * Port id.
+ * @param queue_id
+ * Rx queue id.
+ * @return
+ * 0 on success
+ */
+static int
+sleep_until_rx_interrupt(uint8_t port_id, uint8_t queue_id)
+{
+ /* Enable one-shot rx interrupt */
+ rte_spinlock_lock(&(locks[port_id]));
+ rte_eth_dev_rx_queue_intr_enable(port_id, queue_id);
+ rte_spinlock_unlock(&(locks[port_id]));
+
+ RTE_LOG(INFO, L3FWD_POWER,
+ "lcore %u sleeps until interrupt on port%d,rxq%d triggers\n",
+ rte_lcore_id(), port_id, queue_id);
+
+ rte_eal_wait_rx_intr(port_id, queue_id);
+ RTE_LOG(INFO, L3FWD_POWER,
+ "lcore %u is waked up from rx interrupt on port%d,rxq%d\n",
+ rte_lcore_id(), port_id, queue_id);
+
+ /* Disable one-shot rx interrupt */
+ rte_spinlock_lock(&(locks[port_id]));
+ rte_eth_dev_rx_queue_intr_disable(port_id, queue_id);
+ rte_spinlock_unlock(&(locks[port_id]));
+
+ return 0;
+}
+
/* main processing loop */
static int
main_loop(__attribute__((unused)) void *dummy)
@@ -775,7 +821,6 @@ main_loop(__attribute__((unused)) void *dummy)
struct lcore_conf *qconf;
struct lcore_rx_queue *rx_queue;
enum freq_scale_hint_t lcore_scaleup_hint;
-
uint32_t lcore_rx_idle_count = 0;
uint32_t lcore_idle_hint = 0;
@@ -835,6 +880,8 @@ main_loop(__attribute__((unused)) void *dummy)
prev_tsc_power = cur_tsc_power;
}
+
+start_rx:
/*
* Read packet from RX queues
*/
@@ -848,6 +895,7 @@ main_loop(__attribute__((unused)) void *dummy)
nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
MAX_PKT_BURST);
+
stats[lcore_id].nb_rx_processed += nb_rx;
if (unlikely(nb_rx == 0)) {
/**
@@ -910,10 +958,13 @@ main_loop(__attribute__((unused)) void *dummy)
rx_queue->freq_up_hint;
}
- if (lcore_scaleup_hint == FREQ_HIGHEST)
- rte_power_freq_max(lcore_id);
- else if (lcore_scaleup_hint == FREQ_HIGHER)
- rte_power_freq_up(lcore_id);
+ if (lcore_scaleup_hint == FREQ_HIGHEST) {
+ if (rte_power_freq_max)
+ rte_power_freq_max(lcore_id);
+ } else if (lcore_scaleup_hint == FREQ_HIGHER) {
+ if (rte_power_freq_up)
+ rte_power_freq_up(lcore_id);
+ }
} else {
/**
* All Rx queues empty in recent consecutive polls,
@@ -928,16 +979,21 @@ main_loop(__attribute__((unused)) void *dummy)
lcore_idle_hint = rx_queue->idle_hint;
}
- if ( lcore_idle_hint < SLEEP_GEAR1_THRESHOLD)
+ if (lcore_idle_hint < SUSPEND_THRESHOLD)
/**
- * execute "pause" instruction to avoid context
- * switch for short sleep.
- */
+ * execute "pause" instruction to avoid context
+ * switch which generally take hundres of
+ * microsecond for short sleep.
+ */
rte_delay_us(lcore_idle_hint);
- else
- /* long sleep force runing thread to suspend */
- usleep(lcore_idle_hint);
-
+ else {
+ /* suspend untill rx interrupt trigges */
+ sleep_until_rx_interrupt(
+ qconf->rx_queue_list[0].port_id,
+ qconf->rx_queue_list[0].queue_id);
+ /* start receiving packets immediately */
+ goto start_rx;
+ }
stats[lcore_id].sleep_time += lcore_idle_hint;
}
}
@@ -1270,7 +1326,7 @@ setup_hash(int socketid)
char s[64];
/* create ipv4 hash */
- snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
+ rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
ipv4_l3fwd_hash_params.name = s;
ipv4_l3fwd_hash_params.socket_id = socketid;
ipv4_l3fwd_lookup_struct[socketid] =
@@ -1280,7 +1336,7 @@ setup_hash(int socketid)
"socket %d\n", socketid);
/* create ipv6 hash */
- snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
+ rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
ipv6_l3fwd_hash_params.name = s;
ipv6_l3fwd_hash_params.socket_id = socketid;
ipv6_l3fwd_lookup_struct[socketid] =
@@ -1476,6 +1532,7 @@ main(int argc, char **argv)
unsigned lcore_id;
uint64_t hz;
uint32_t n_tx_queue, nb_lcores;
+ uint32_t dev_rxq_num, dev_txq_num;
uint8_t portid, nb_rx_queue, queue, socketid;
/* catch SIGINT and restore cpufreq governor to ondemand */
@@ -1525,10 +1582,19 @@ main(int argc, char **argv)
printf("Initializing port %d ... ", portid );
fflush(stdout);
+ rte_eth_dev_info_get(portid, &dev_info);
+ dev_rxq_num = dev_info.max_rx_queues;
+ dev_txq_num = dev_info.max_tx_queues;
+
nb_rx_queue = get_port_n_rx_queues(portid);
+ if (nb_rx_queue > dev_rxq_num)
+ rte_exit(EXIT_FAILURE,
+ "Cannot configure not existed rxq: "
+ "port=%d\n", portid);
+
n_tx_queue = nb_lcores;
- if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
- n_tx_queue = MAX_TX_QUEUE_PER_PORT;
+ if (n_tx_queue > dev_txq_num)
+ n_tx_queue = dev_txq_num;
printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
nb_rx_queue, (unsigned)n_tx_queue );
ret = rte_eth_dev_configure(portid, nb_rx_queue,
@@ -1552,6 +1618,9 @@ main(int argc, char **argv)
if (rte_lcore_is_enabled(lcore_id) == 0)
continue;
+ if (queueid >= dev_txq_num)
+ continue;
+
if (numa_on)
socketid = \
(uint8_t)rte_lcore_to_socket_id(lcore_id);
@@ -1586,8 +1655,9 @@ main(int argc, char **argv)
/* init power management library */
ret = rte_power_init(lcore_id);
if (ret)
- rte_exit(EXIT_FAILURE, "Power management library "
- "initialization failed on core%u\n", lcore_id);
+ rte_log(RTE_LOG_ERR, RTE_LOGTYPE_POWER,
+ "Power management library initialization "
+ "failed on core%u", lcore_id);
/* init timer structures for each enabled lcore */
rte_timer_init(&power_timers[lcore_id]);
@@ -1635,7 +1705,6 @@ main(int argc, char **argv)
if (ret < 0)
rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, "
"port=%d\n", ret, portid);
-
/*
* If enabled, put device in promiscuous mode.
* This allows IO forwarding mode to forward packets
@@ -1644,6 +1713,8 @@ main(int argc, char **argv)
*/
if (promiscuous_on)
rte_eth_promiscuous_enable(portid);
+ /* initialize spinlock for each port */
+ rte_spinlock_init(&(locks[portid]));
}
check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
--
1.8.1.4
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD
2015-02-19 13:48 [dpdk-dev] [PATCH v4 0/5] Interrupt mode PMD Zhou Danny
` (4 preceding siblings ...)
2015-02-19 13:48 ` [dpdk-dev] [PATCH v4 5/5] l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch Zhou Danny
@ 2015-02-20 8:50 ` Gonzalez Monroy, Sergio
5 siblings, 0 replies; 19+ messages in thread
From: Gonzalez Monroy, Sergio @ 2015-02-20 8:50 UTC (permalink / raw)
To: Zhou Danny, dev
On 19/02/2015 13:48, Zhou Danny wrote:
> v4 changes
> - Export interrupt enable/disable functions for shared libraries
> - Adjust position of new-added structure fields and functions to
> avoid breaking ABI
>
> v3 changes
> - Add return value for interrupt enable/disable functions
> - Move spinlok from PMD to L3fwd-power
> - Remove unnecessary variables in e1000_mac_info
> - Fix miscelleous review comments
>
> v2 changes
> - Fix compilation issue in Makefile for missed header file.
> - Consolidate internal and community review comments of v1 patch set.
>
> The patch series introduce low-latency one-shot rx interrupt into DPDK with
> polling and interrupt mode switch control example.
>
> DPDK userspace interrupt notification and handling mechanism is based on UIO
> with below limitation:
> 1) It is designed to handle LSC interrupt only with inefficient suspended
> pthread wakeup procedure (e.g. UIO wakes up LSC interrupt handling thread
> which then wakes up DPDK polling thread). In this way, it introduces
> non-deterministic wakeup latency for DPDK polling thread as well as packet
> latency if it is used to handle Rx interrupt.
> 2) UIO only supports a single interrupt vector which has to been shared by
> LSC interrupt and interrupts assigned to dedicated rx queues.
>
> This patchset includes below features:
> 1) Enable one-shot rx queue interrupt in ixgbe PMD(PF & VF) and igb PMD(PF only).
> 2) Build on top of the VFIO mechanism instead of UIO, so it could support
> up to 64 interrupt vectors for rx queue interrupts.
> 3) Have 1 DPDK polling thread handle per Rx queue interrupt with a dedicated
> VFIO eventfd, which eliminates non-deterministic pthread wakeup latency in
> user space.
> 4) Demonstrate interrupts control APIs and userspace NAIP-like polling/interrupt
> switch algorithms in L3fwd-power example.
>
> Known limitations:
> 1) It does not work for UIO due to a single interrupt eventfd shared by LSC
> and rx queue interrupt handlers causes a mess.
> 2) LSC interrupt is not supported by VF driver, so it is by default disabled
> in L3fwd-power now. Feel free to turn in on if you want to support both LSC
> and rx queue interrupts on a PF.
>
> Danny Zhou (5):
> ethdev: add rx interrupt enable/disable functions
> ixgbe: enable rx queue interrupts for both PF and VF
> igb: enable rx queue interrupts for PF
> eal: add per rx queue interrupt handling based on VFIO
> l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode
> switch
>
> examples/l3fwd-power/main.c | 153 ++++++---
> lib/librte_eal/common/include/rte_eal.h | 12 +
> lib/librte_eal/linuxapp/eal/Makefile | 1 +
> lib/librte_eal/linuxapp/eal/eal_interrupts.c | 190 ++++++++---
> lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 12 +-
> .../linuxapp/eal/include/exec-env/rte_interrupts.h | 4 +
> lib/librte_ether/rte_ethdev.c | 43 +++
> lib/librte_ether/rte_ethdev.h | 59 ++++
> lib/librte_ether/rte_ether_version.map | 2 +
> lib/librte_pmd_e1000/e1000_ethdev.h | 3 +
> lib/librte_pmd_e1000/igb_ethdev.c | 228 +++++++++++--
> lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 365 ++++++++++++++++++++-
> lib/librte_pmd_ixgbe/ixgbe_ethdev.h | 6 +
> 13 files changed, 964 insertions(+), 114 deletions(-)
>
Series
Acked-by: Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>
^ permalink raw reply [flat|nested] 19+ messages in thread