* [dpdk-dev] [PATCH v1 1/5] ethdev: add rx interrupt enable/disable functions
2015-01-28 9:50 [dpdk-dev] [PATCH v1 0/5] Interrupt mode for PMD Danny Zhou
@ 2015-01-28 9:50 ` Danny Zhou
2015-01-29 2:22 ` Qiu, Michael
2015-01-28 9:50 ` [dpdk-dev] [PATCH v1 2/5] ixgbe: enable rx queue interrupts for both PF and VF Danny Zhou
` (3 subsequent siblings)
4 siblings, 1 reply; 16+ messages in thread
From: Danny Zhou @ 2015-01-28 9:50 UTC (permalink / raw)
To: dev
Signed-off-by: Danny Zhou <danny.zhou@intel.com>
---
lib/librte_ether/rte_ethdev.c | 45 ++++++++++++++++++++++++++++++++++
lib/librte_ether/rte_ethdev.h | 57 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 102 insertions(+)
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index ea3a1fb..dd66cd9 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -2825,6 +2825,51 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
}
rte_spinlock_unlock(&rte_eth_dev_cb_lock);
}
+
+int
+rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
+ uint16_t queue_id)
+{
+ struct rte_eth_dev *dev;
+
+ if (port_id >= nb_ports) {
+ PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+ return (-ENODEV);
+ }
+
+ dev = &rte_eth_devices[port_id];
+ if (dev == NULL) {
+ PMD_DEBUG_TRACE("Invalid port device\n");
+ return (-ENODEV);
+ }
+
+ FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP);
+ (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id);
+ return 0;
+}
+
+int
+rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
+ uint16_t queue_id)
+{
+ struct rte_eth_dev *dev;
+
+ if (port_id >= nb_ports) {
+ PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+ return (-ENODEV);
+ }
+
+ dev = &rte_eth_devices[port_id];
+ if (dev == NULL) {
+ PMD_DEBUG_TRACE("Invalid port device\n");
+ return (-ENODEV);
+ }
+
+ FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP);
+ (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id);
+ return 0;
+}
+
#ifdef RTE_NIC_BYPASS
int rte_eth_dev_bypass_init(uint8_t port_id)
{
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 1200c1c..c080039 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -848,6 +848,8 @@ struct rte_eth_fdir {
struct rte_intr_conf {
/** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */
uint16_t lsc;
+ /** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */
+ uint16_t rxq;
};
/**
@@ -1108,6 +1110,14 @@ typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
const struct rte_eth_txconf *tx_conf);
/**< @internal Setup a transmit queue of an Ethernet device. */
+typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
+ uint16_t rx_queue_id);
+/**< @internal Enable interrupt of a receive queue of an Ethernet device. */
+
+typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
+ uint16_t rx_queue_id);
+/**< @internal Disable interrupt of a receive queue of an Ethernet device. */
+
typedef void (*eth_queue_release_t)(void *queue);
/**< @internal Release memory resources allocated by given RX/TX queue. */
@@ -1444,6 +1454,8 @@ struct eth_dev_ops {
eth_queue_start_t tx_queue_start;/**< Start TX for a queue.*/
eth_queue_stop_t tx_queue_stop;/**< Stop TX for a queue.*/
eth_rx_queue_setup_t rx_queue_setup;/**< Set up device RX queue.*/
+ eth_rx_enable_intr_t rx_queue_intr_enable; /**< Enable Rx queue interrupt. */
+ eth_rx_disable_intr_t rx_queue_intr_disable; /**< Disable Rx queue interrupt.*/
eth_queue_release_t rx_queue_release;/**< Release RX queue.*/
eth_rx_queue_count_t rx_queue_count; /**< Get Rx queue count. */
eth_rx_descriptor_done_t rx_descriptor_done; /**< Check rxd DD bit */
@@ -2810,6 +2822,51 @@ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
enum rte_eth_event_type event);
/**
+ * When there is no rx packet coming in Rx Queue for a long time, we can
+ * sleep lcore related to RX Queue for power saving, and enable rx interrupt
+ * to be triggered when rx packect arrives.
+ *
+ * The rte_eth_dev_rx_queue_intr_enable() function enables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The index of the receive queue from which to retrieve input packets.
+ * The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ * to rte_eth_dev_configure().
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ * that operation.
+ * - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
+ uint16_t queue_id);
+
+/**
+ * When lcore wakes up from rx interrupt indicating packet coming, disable rx
+ * interrupt and returns to polling mode.
+ *
+ * The rte_eth_dev_rx_queue_intr_disable() function disables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The index of the receive queue from which to retrieve input packets.
+ * The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ * to rte_eth_dev_configure().
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ * that operation.
+ * - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
+ uint16_t queue_id);
+
+/**
* Turn on the LED on the Ethernet device.
* This function turns on the LED on the Ethernet device.
*
--
1.8.1.4
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [dpdk-dev] [PATCH v1 1/5] ethdev: add rx interrupt enable/disable functions
2015-01-28 9:50 ` [dpdk-dev] [PATCH v1 1/5] ethdev: add rx interrupt enable/disable functions Danny Zhou
@ 2015-01-29 2:22 ` Qiu, Michael
0 siblings, 0 replies; 16+ messages in thread
From: Qiu, Michael @ 2015-01-29 2:22 UTC (permalink / raw)
To: Zhou, Danny, dev
On 1/28/2015 5:51 PM, Danny Zhou wrote:
Commit log is better for others to review the patch I think, it's
helpful for others to understand your patch.
> Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> ---
> lib/librte_ether/rte_ethdev.c | 45 ++++++++++++++++++++++++++++++++++
> lib/librte_ether/rte_ethdev.h | 57 +++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 102 insertions(+)
>
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index ea3a1fb..dd66cd9 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -2825,6 +2825,51 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
> }
> rte_spinlock_unlock(&rte_eth_dev_cb_lock);
> }
> +
> +int
> +rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
> + uint16_t queue_id)
> +{
> + struct rte_eth_dev *dev;
> +
> + if (port_id >= nb_ports) {
> + PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
> + return (-ENODEV);
> + }
> +
> + dev = &rte_eth_devices[port_id];
> + if (dev == NULL) {
> + PMD_DEBUG_TRACE("Invalid port device\n");
> + return (-ENODEV);
> + }
> +
> + FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP);
> + (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id);
As callback function rx_queue_intr_enabl() has a return value, I think
it better to do a check, although all your implement always return a
value of zero.
BTW, better to add a blank line before last return.
Thanks,
Michael
> + return 0;
> +}
> +
> +int
> +rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
> + uint16_t queue_id)
> +{
> + struct rte_eth_dev *dev;
> +
> + if (port_id >= nb_ports) {
> + PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
> + return (-ENODEV);
> + }
> +
> + dev = &rte_eth_devices[port_id];
> + if (dev == NULL) {
> + PMD_DEBUG_TRACE("Invalid port device\n");
> + return (-ENODEV);
> + }
> +
> + FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP);
> + (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id);
> + return 0;
> +}
> +
> #ifdef RTE_NIC_BYPASS
> int rte_eth_dev_bypass_init(uint8_t port_id)
> {
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index 1200c1c..c080039 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -848,6 +848,8 @@ struct rte_eth_fdir {
> struct rte_intr_conf {
> /** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */
> uint16_t lsc;
> + /** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */
> + uint16_t rxq;
> };
>
> /**
> @@ -1108,6 +1110,14 @@ typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
> const struct rte_eth_txconf *tx_conf);
> /**< @internal Setup a transmit queue of an Ethernet device. */
>
> +typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
> + uint16_t rx_queue_id);
> +/**< @internal Enable interrupt of a receive queue of an Ethernet device. */
> +
> +typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
> + uint16_t rx_queue_id);
> +/**< @internal Disable interrupt of a receive queue of an Ethernet device. */
> +
> typedef void (*eth_queue_release_t)(void *queue);
> /**< @internal Release memory resources allocated by given RX/TX queue. */
>
> @@ -1444,6 +1454,8 @@ struct eth_dev_ops {
> eth_queue_start_t tx_queue_start;/**< Start TX for a queue.*/
> eth_queue_stop_t tx_queue_stop;/**< Stop TX for a queue.*/
> eth_rx_queue_setup_t rx_queue_setup;/**< Set up device RX queue.*/
> + eth_rx_enable_intr_t rx_queue_intr_enable; /**< Enable Rx queue interrupt. */
> + eth_rx_disable_intr_t rx_queue_intr_disable; /**< Disable Rx queue interrupt.*/
> eth_queue_release_t rx_queue_release;/**< Release RX queue.*/
> eth_rx_queue_count_t rx_queue_count; /**< Get Rx queue count. */
> eth_rx_descriptor_done_t rx_descriptor_done; /**< Check rxd DD bit */
> @@ -2810,6 +2822,51 @@ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
> enum rte_eth_event_type event);
>
> /**
> + * When there is no rx packet coming in Rx Queue for a long time, we can
> + * sleep lcore related to RX Queue for power saving, and enable rx interrupt
> + * to be triggered when rx packect arrives.
> + *
> + * The rte_eth_dev_rx_queue_intr_enable() function enables rx queue
> + * interrupt on specific rx queue of a port.
> + *
> + * @param port_id
> + * The port identifier of the Ethernet device.
> + * @param queue_id
> + * The index of the receive queue from which to retrieve input packets.
> + * The value must be in the range [0, nb_rx_queue - 1] previously supplied
> + * to rte_eth_dev_configure().
> + * @return
> + * - (0) if successful.
> + * - (-ENOTSUP) if underlying hardware OR driver doesn't support
> + * that operation.
> + * - (-ENODEV) if *port_id* invalid.
> + */
> +int rte_eth_dev_rx_queue_intr_enable(uint8_t port_id,
> + uint16_t queue_id);
> +
> +/**
> + * When lcore wakes up from rx interrupt indicating packet coming, disable rx
> + * interrupt and returns to polling mode.
> + *
> + * The rte_eth_dev_rx_queue_intr_disable() function disables rx queue
> + * interrupt on specific rx queue of a port.
> + *
> + * @param port_id
> + * The port identifier of the Ethernet device.
> + * @param queue_id
> + * The index of the receive queue from which to retrieve input packets.
> + * The value must be in the range [0, nb_rx_queue - 1] previously supplied
> + * to rte_eth_dev_configure().
> + * @return
> + * - (0) if successful.
> + * - (-ENOTSUP) if underlying hardware OR driver doesn't support
> + * that operation.
> + * - (-ENODEV) if *port_id* invalid.
> + */
> +int rte_eth_dev_rx_queue_intr_disable(uint8_t port_id,
> + uint16_t queue_id);
> +
> +/**
> * Turn on the LED on the Ethernet device.
> * This function turns on the LED on the Ethernet device.
> *
^ permalink raw reply [flat|nested] 16+ messages in thread
* [dpdk-dev] [PATCH v1 2/5] ixgbe: enable rx queue interrupts for both PF and VF
2015-01-28 9:50 [dpdk-dev] [PATCH v1 0/5] Interrupt mode for PMD Danny Zhou
2015-01-28 9:50 ` [dpdk-dev] [PATCH v1 1/5] ethdev: add rx interrupt enable/disable functions Danny Zhou
@ 2015-01-28 9:50 ` Danny Zhou
2015-01-29 3:40 ` Qiu, Michael
2015-01-28 9:50 ` [dpdk-dev] [PATCH v1 3/5] igb: enable rx queue interrupts for PF Danny Zhou
` (2 subsequent siblings)
4 siblings, 1 reply; 16+ messages in thread
From: Danny Zhou @ 2015-01-28 9:50 UTC (permalink / raw)
To: dev
Signed-off-by: Danny Zhou <danny.zhou@intel.com>
---
lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 371 ++++++++++++++++++++++++++++++++++++
lib/librte_pmd_ixgbe/ixgbe_ethdev.h | 9 +
2 files changed, 380 insertions(+)
diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
index b341dd0..39f883a 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
@@ -60,6 +60,7 @@
#include <rte_atomic.h>
#include <rte_malloc.h>
#include <rte_random.h>
+#include <rte_spinlock.h>
#include <rte_dev.h>
#include "ixgbe_logs.h"
@@ -173,6 +174,7 @@ static int ixgbe_dev_rss_reta_query(struct rte_eth_dev *dev,
uint16_t reta_size);
static void ixgbe_dev_link_status_print(struct rte_eth_dev *dev);
static int ixgbe_dev_lsc_interrupt_setup(struct rte_eth_dev *dev);
+static int ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev);
static int ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev);
static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev);
static void ixgbe_dev_interrupt_handler(struct rte_intr_handle *handle,
@@ -186,11 +188,14 @@ static void ixgbe_dcb_init(struct ixgbe_hw *hw,struct ixgbe_dcb_config *dcb_conf
/* For Virtual Function support */
static int eth_ixgbevf_dev_init(struct eth_driver *eth_drv,
struct rte_eth_dev *eth_dev);
+static int ixgbevf_dev_interrupt_get_status(struct rte_eth_dev *dev);
+static int ixgbevf_dev_interrupt_action(struct rte_eth_dev *dev);
static int ixgbevf_dev_configure(struct rte_eth_dev *dev);
static int ixgbevf_dev_start(struct rte_eth_dev *dev);
static void ixgbevf_dev_stop(struct rte_eth_dev *dev);
static void ixgbevf_dev_close(struct rte_eth_dev *dev);
static void ixgbevf_intr_disable(struct ixgbe_hw *hw);
+static void ixgbevf_intr_enable(struct ixgbe_hw *hw);
static void ixgbevf_dev_stats_get(struct rte_eth_dev *dev,
struct rte_eth_stats *stats);
static void ixgbevf_dev_stats_reset(struct rte_eth_dev *dev);
@@ -198,8 +203,15 @@ static int ixgbevf_vlan_filter_set(struct rte_eth_dev *dev,
uint16_t vlan_id, int on);
static void ixgbevf_vlan_strip_queue_set(struct rte_eth_dev *dev,
uint16_t queue, int on);
+static void ixgbevf_set_ivar(struct ixgbe_hw *hw, s8 direction, u8 queue, u8 msix_vector);
static void ixgbevf_vlan_offload_set(struct rte_eth_dev *dev, int mask);
static void ixgbevf_set_vfta_all(struct rte_eth_dev *dev, bool on);
+static void ixgbevf_dev_interrupt_handler(struct rte_intr_handle *handle,
+ void *param);
+static int ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id);
+static int ixgbevf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
+static void ixgbevf_set_ivar(struct ixgbe_hw *hw, s8 direction, u8 queue, u8 msix_vector);
+static void ixgbevf_configure_msix(struct ixgbe_hw *hw);
/* For Eth VMDQ APIs support */
static int ixgbe_uc_hash_table_set(struct rte_eth_dev *dev, struct
@@ -217,6 +229,11 @@ static int ixgbe_mirror_rule_set(struct rte_eth_dev *dev,
static int ixgbe_mirror_rule_reset(struct rte_eth_dev *dev,
uint8_t rule_id);
+static int ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id);
+static int ixgbe_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
+static void ixgbe_set_ivar(struct ixgbe_hw *hw, s8 direction, u8 queue, u8 msix_vector);
+static void ixgbe_configure_msix(struct ixgbe_hw *hw);
+
static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev,
uint16_t queue_idx, uint16_t tx_rate);
static int ixgbe_set_vf_rate_limit(struct rte_eth_dev *dev, uint16_t vf,
@@ -338,6 +355,8 @@ static struct eth_dev_ops ixgbe_eth_dev_ops = {
.tx_queue_start = ixgbe_dev_tx_queue_start,
.tx_queue_stop = ixgbe_dev_tx_queue_stop,
.rx_queue_setup = ixgbe_dev_rx_queue_setup,
+ .rx_queue_intr_enable = ixgbe_dev_rx_queue_intr_enable,
+ .rx_queue_intr_disable = ixgbe_dev_rx_queue_intr_disable,
.rx_queue_release = ixgbe_dev_rx_queue_release,
.rx_queue_count = ixgbe_dev_rx_queue_count,
.rx_descriptor_done = ixgbe_dev_rx_descriptor_done,
@@ -412,8 +431,11 @@ static struct eth_dev_ops ixgbevf_eth_dev_ops = {
.vlan_offload_set = ixgbevf_vlan_offload_set,
.rx_queue_setup = ixgbe_dev_rx_queue_setup,
.rx_queue_release = ixgbe_dev_rx_queue_release,
+ .rx_descriptor_done = ixgbe_dev_rx_descriptor_done,
.tx_queue_setup = ixgbe_dev_tx_queue_setup,
.tx_queue_release = ixgbe_dev_tx_queue_release,
+ .rx_queue_intr_enable = ixgbevf_dev_rx_queue_intr_enable,
+ .rx_queue_intr_disable = ixgbevf_dev_rx_queue_intr_disable,
.mac_addr_add = ixgbevf_add_mac_addr,
.mac_addr_remove = ixgbevf_remove_mac_addr,
};
@@ -908,6 +930,9 @@ eth_ixgbe_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
eth_dev->data->port_id, pci_dev->id.vendor_id,
pci_dev->id.device_id);
+ /* set max interrupt vfio request */
+ pci_dev->intr_handle.max_intr = hw->mac.max_rx_queues + IXGBE_MAX_OTHER_INTR;
+
rte_intr_callback_register(&(pci_dev->intr_handle),
ixgbe_dev_interrupt_handler, (void *)eth_dev);
@@ -1084,6 +1109,14 @@ eth_ixgbevf_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
return (-EIO);
}
+ /* set max interrupt vfio request */
+ pci_dev->intr_handle.max_intr = hw->mac.max_rx_queues + IXGBEVF_MAX_OTHER_INTR;
+
+ rte_intr_callback_register(&(pci_dev->intr_handle),
+ ixgbevf_dev_interrupt_handler, (void *)eth_dev);
+
+ rte_intr_enable(&(pci_dev->intr_handle));
+
PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x mac.type=%s",
eth_dev->data->port_id, pci_dev->id.vendor_id,
pci_dev->id.device_id, "ixgbe_mac_82599_vf");
@@ -1485,6 +1518,9 @@ ixgbe_dev_start(struct rte_eth_dev *dev)
/* configure PF module if SRIOV enabled */
ixgbe_pf_host_configure(dev);
+ /* confiugre msix for sleep until rx interrupt */
+ ixgbe_configure_msix(hw);
+
/* initialize transmission unit */
ixgbe_dev_tx_init(dev);
@@ -1560,6 +1596,10 @@ skip_link_setup:
if (dev->data->dev_conf.intr_conf.lsc != 0)
ixgbe_dev_lsc_interrupt_setup(dev);
+ /* check if rxq interrupt is enabled */
+ if (dev->data->dev_conf.intr_conf.rxq != 0)
+ ixgbe_dev_rxq_interrupt_setup(dev);
+
/* resume enabled intr since hw reset */
ixgbe_enable_intr(dev);
@@ -2221,6 +2261,29 @@ ixgbe_dev_lsc_interrupt_setup(struct rte_eth_dev *dev)
return 0;
}
+/**
+ * It clears the interrupt causes and enables the interrupt.
+ * It will be called once only during nic initialized.
+ *
+ * @param dev
+ * Pointer to struct rte_eth_dev.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+static int
+ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev)
+{
+ struct ixgbe_interrupt *intr =
+ IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+
+ intr->mask |= IXGBE_EICR_RTX_QUEUE;
+ rte_spinlock_init(&intr->lock);
+
+ return 0;
+}
+
/*
* It reads ICR and sets flag (IXGBE_EICR_LSC) for the link_update.
*
@@ -2258,6 +2321,30 @@ ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev)
return 0;
}
+static int
+ixgbevf_dev_interrupt_get_status(struct rte_eth_dev *dev)
+{
+ uint32_t eicr;
+ struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct ixgbe_interrupt *intr =
+ IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+
+ /* clear all cause mask */
+ ixgbevf_intr_disable(hw);
+
+ /* read-on-clear nic registers here */
+ eicr = IXGBE_READ_REG(hw, IXGBE_VTEICR);
+ PMD_DRV_LOG(INFO, "eicr %x", eicr);
+
+ intr->flags = 0;
+
+ /* set flag for async link update */
+ if (eicr & IXGBE_EICR_LSC)
+ intr->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
+
+ return 0;
+}
+
/**
* It gets and then prints the link status.
*
@@ -2353,6 +2440,18 @@ ixgbe_dev_interrupt_action(struct rte_eth_dev *dev)
return 0;
}
+static int
+ixgbevf_dev_interrupt_action(struct rte_eth_dev *dev)
+{
+ struct ixgbe_hw *hw =
+ IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+ PMD_DRV_LOG(DEBUG, "enable intr immediately");
+ ixgbevf_intr_enable(hw);
+ rte_intr_enable(&(dev->pci_dev->intr_handle));
+ return 0;
+}
+
/**
* Interrupt handler which shall be registered for alarm callback for delayed
* handling specific interrupt to wait for the stable nic state. As the
@@ -2414,6 +2513,15 @@ ixgbe_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
ixgbe_dev_interrupt_action(dev);
}
+static void
+ixgbevf_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
+ void *param)
+{
+ struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
+ ixgbevf_dev_interrupt_get_status(dev);
+ ixgbevf_dev_interrupt_action(dev);
+}
+
static int
ixgbe_dev_led_on(struct rte_eth_dev *dev)
{
@@ -2912,6 +3020,19 @@ ixgbevf_intr_disable(struct ixgbe_hw *hw)
IXGBE_WRITE_FLUSH(hw);
}
+static void
+ixgbevf_intr_enable(struct ixgbe_hw *hw)
+{
+ PMD_INIT_FUNC_TRACE();
+
+ /* VF enable interrupt autoclean */
+ IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, IXGBE_VF_IRQ_ENABLE_MASK);
+ IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, IXGBE_VF_IRQ_ENABLE_MASK);
+ IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, IXGBE_VF_IRQ_ENABLE_MASK);
+
+ IXGBE_WRITE_FLUSH(hw);
+}
+
static int
ixgbevf_dev_configure(struct rte_eth_dev *dev)
{
@@ -2974,6 +3095,11 @@ ixgbevf_dev_start(struct rte_eth_dev *dev)
ixgbevf_dev_rxtx_start(dev);
+ ixgbevf_configure_msix(hw);
+
+ /* Re-enable interrupt for VF */
+ ixgbevf_intr_enable(hw);
+
return 0;
}
@@ -3511,6 +3637,251 @@ ixgbe_mirror_rule_reset(struct rte_eth_dev *dev, uint8_t rule_id)
return 0;
}
+
+static int
+ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ uint32_t mask;
+ struct ixgbe_hw *hw =
+ IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct ixgbe_interrupt *intr =
+ IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+
+ rte_spinlock_lock(&(intr->lock));
+ mask = IXGBE_READ_REG(hw, IXGBE_VTEIMS);
+ mask |= (1 << queue_id);
+ IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask);
+ rte_spinlock_unlock(&(intr->lock));
+
+ return 0;
+}
+
+static int
+ixgbevf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ uint32_t mask;
+ struct ixgbe_hw *hw =
+ IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct ixgbe_interrupt *intr =
+ IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+
+ rte_spinlock_lock(&(intr->lock));
+ mask = IXGBE_READ_REG(hw, IXGBE_VTEIMS);
+ mask &= ~(1 << queue_id);
+ IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask);
+ rte_spinlock_unlock(&(intr->lock));
+
+ return 0;
+}
+
+static int
+ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ u32 mask;
+ struct ixgbe_hw *hw =
+ IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct ixgbe_interrupt *intr =
+ IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+
+ rte_spinlock_lock(&(intr->lock));
+ if (queue_id < 16) {
+ ixgbe_disable_intr(hw);
+ intr->mask |= (1 << queue_id);
+ ixgbe_enable_intr(dev);
+ } else if (queue_id < 32) {
+ mask = IXGBE_READ_REG(hw, IXGBE_EIMS_EX(0));
+ mask &= (1 << queue_id);
+ IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
+ } else if (queue_id < 64) {
+ mask = IXGBE_READ_REG(hw, IXGBE_EIMS_EX(1));
+ mask &= (1 << (queue_id - 32));
+ IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
+ }
+ rte_spinlock_unlock(&(intr->lock));
+
+ return 0;
+}
+
+static int
+ixgbe_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ u32 mask;
+ struct ixgbe_hw *hw =
+ IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct ixgbe_interrupt *intr =
+ IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+
+ rte_spinlock_lock(&(intr->lock));
+ if (queue_id < 16) {
+ ixgbe_disable_intr(hw);
+ intr->mask &= ~(1 << queue_id);
+ ixgbe_enable_intr(dev);
+ } else if (queue_id < 32) {
+ mask = IXGBE_READ_REG(hw, IXGBE_EIMS_EX(0));
+ mask &= ~(1 << queue_id);
+ IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
+ } else if (queue_id < 64) {
+ mask = IXGBE_READ_REG(hw, IXGBE_EIMS_EX(1));
+ mask &= ~(1 << (queue_id - 32));
+ IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
+ }
+ rte_spinlock_unlock(&(intr->lock));
+
+ return 0;
+}
+
+static void
+ixgbevf_set_ivar(struct ixgbe_hw *hw, s8 direction,
+ u8 queue, u8 msix_vector)
+{
+ u32 ivar, index;
+ if (direction == -1) {
+ /* other causes */
+ msix_vector |= IXGBE_IVAR_ALLOC_VAL;
+ ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR_MISC);
+ ivar &= ~0xFF;
+ ivar |= msix_vector;
+ IXGBE_WRITE_REG(hw, IXGBE_VTIVAR_MISC, ivar);
+ } else {
+ /* tx or tx cause */
+ msix_vector |= IXGBE_IVAR_ALLOC_VAL;
+ index = ((16 * (queue & 1)) + (8 * direction));
+ ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR(queue >> 1));
+ ivar &= ~(0xFF << index);
+ ivar |= (msix_vector << index);
+ IXGBE_WRITE_REG(hw, IXGBE_VTIVAR(queue >> 1), ivar);
+ }
+}
+
+/**
+ * ixgbe_set_ivar - set the IVAR registers, mapping interrupt causes to vectors
+ * @hw: pointer to ixgbe_hw struct
+ * @direction: 0 for Rx, 1 for Tx, -1 for other causes
+ * @queue: queue to map the corresponding interrupt to
+ * @msix_vector: the vector to map to the corresponding queue
+ */
+static void
+ixgbe_set_ivar(struct ixgbe_hw *hw, s8 direction,
+ u8 queue, u8 msix_vector)
+{
+ u32 ivar, index;
+ switch (hw->mac.type) {
+ case ixgbe_mac_82598EB:
+ msix_vector |= IXGBE_IVAR_ALLOC_VAL;
+ if (direction == -1)
+ direction = 0;
+ index = (((direction * 64) + queue) >> 2) & 0x1F;
+ ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
+ ivar &= ~(0xFF << (8 * (queue & 0x3)));
+ ivar |= (msix_vector << (8 * (queue & 0x3)));
+ IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
+ break;
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ if (direction == -1) {
+ /* other causes */
+ msix_vector |= IXGBE_IVAR_ALLOC_VAL;
+ index = ((queue & 1) * 8);
+ ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
+ ivar &= ~(0xFF << index);
+ ivar |= (msix_vector << index);
+ IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
+ break;
+ } else {
+ /* tx or rx causes */
+ msix_vector |= IXGBE_IVAR_ALLOC_VAL;
+ index = ((16 * (queue & 1)) + (8 * direction));
+ ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(queue >> 1));
+ ivar &= ~(0xFF << index);
+ ivar |= (msix_vector << index);
+ IXGBE_WRITE_REG(hw, IXGBE_IVAR(queue >> 1), ivar);
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+
+static void
+ixgbevf_configure_msix(struct ixgbe_hw *hw)
+{
+ u32 queue_idx, vector_idx;
+ /* Configure all RX queues of VF */
+ for (vector_idx = 0; vector_idx < IXGBE_VF_MAXMSIVECTOR; vector_idx++) {
+ for (queue_idx = 0; queue_idx < (hw->mac.max_rx_queues - 1); queue_idx++)
+ ixgbevf_set_ivar(hw, 0, queue_idx, vector_idx);
+ }
+
+ /* Configure VF Rx queue ivar */
+ ixgbevf_set_ivar(hw, -1, 1, vector_idx);
+}
+
+/**
+ * ixgbe_configure_msix - Configure MSI-X hardware
+ * @hw: board private structure
+ * ixgbe_configure_msix sets up the hardware to properly generate MSI-X
+ * interrupts.
+ */
+static void
+ixgbe_configure_msix(struct ixgbe_hw *hw)
+{
+ int queue_id;
+ u32 mask;
+ u32 gpie;
+
+ /* set GPIE for in MSI-x mode */
+ gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
+ gpie = IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_PBA_SUPPORT |
+ IXGBE_GPIE_OCD;
+ gpie |= IXGBE_GPIE_EIAME;
+ /*
+ * use EIAM to auto-mask when MSI-X interrupt is asserted
+ * this saves a register write for every interrupt
+ */
+ switch (hw->mac.type) {
+ case ixgbe_mac_82598EB:
+ IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
+ break;
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ default:
+ IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
+ IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
+ break;
+ }
+ IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
+
+ /*
+ * Populate the IVAR table and set the ITR values to the
+ * corresponding register.
+ */
+ for (queue_id = 0; queue_id < VFIO_MAX_QUEUE_ID; queue_id++)
+ ixgbe_set_ivar(hw, 0, queue_id, queue_id);
+
+ switch (hw->mac.type) {
+ case ixgbe_mac_82598EB:
+ ixgbe_set_ivar(hw, -1, IXGBE_IVAR_OTHER_CAUSES_INDEX,
+ VFIO_MAX_QUEUE_ID);
+ break;
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ ixgbe_set_ivar(hw, -1, 1, 32);
+ break;
+ default:
+ break;
+ }
+ IXGBE_WRITE_REG(hw, IXGBE_EITR(queue_id), 1950);
+
+ /* set up to autoclear timer, and the vectors */
+ mask = IXGBE_EIMS_ENABLE_MASK;
+ mask &= ~(IXGBE_EIMS_OTHER |
+ IXGBE_EIMS_MAILBOX |
+ IXGBE_EIMS_LSC);
+
+ IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
+}
+
static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev,
uint16_t queue_idx, uint16_t tx_rate)
{
diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
index 1383194..328c387 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
@@ -38,6 +38,8 @@
#include "ixgbe/ixgbe_dcb_82598.h"
#include "ixgbe_bypass.h"
+#include <rte_spinlock.h>
+
/* need update link, bit flag */
#define IXGBE_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
#define IXGBE_FLAG_MAILBOX (uint32_t)(1 << 1)
@@ -98,6 +100,11 @@
#define IXGBE_5TUPLE_MAX_PRI 7
#define IXGBE_5TUPLE_MIN_PRI 1
+#define IXGBE_VF_IRQ_ENABLE_MASK 3 /* vf interrupt enable mask */
+#define IXGBE_VF_MAXMSIVECTOR 1
+/* maximum other interrupts besides rx&tx*/
+#define IXGBE_MAX_OTHER_INTR 1
+#define IXGBEVF_MAX_OTHER_INTR 1
/*
* Information about the fdir mode.
*/
@@ -116,6 +123,7 @@ struct ixgbe_hw_fdir_info {
struct ixgbe_interrupt {
uint32_t flags;
uint32_t mask;
+ rte_spinlock_t lock;
};
struct ixgbe_stat_mapping_registers {
@@ -260,6 +268,7 @@ uint32_t ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev,
uint16_t rx_queue_id);
int ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset);
+int ixgbevf_dev_rx_descriptor_done(void *rx_queue, uint16_t offset);
int ixgbe_dev_rx_init(struct rte_eth_dev *dev);
--
1.8.1.4
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [dpdk-dev] [PATCH v1 2/5] ixgbe: enable rx queue interrupts for both PF and VF
2015-01-28 9:50 ` [dpdk-dev] [PATCH v1 2/5] ixgbe: enable rx queue interrupts for both PF and VF Danny Zhou
@ 2015-01-29 3:40 ` Qiu, Michael
2015-01-29 5:39 ` Zhou, Danny
0 siblings, 1 reply; 16+ messages in thread
From: Qiu, Michael @ 2015-01-29 3:40 UTC (permalink / raw)
To: Zhou, Danny, dev
On 1/28/2015 5:52 PM, Danny Zhou wrote:
> Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> ---
> lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 371 ++++++++++++++++++++++++++++++++++++
> lib/librte_pmd_ixgbe/ixgbe_ethdev.h | 9 +
> 2 files changed, 380 insertions(+)
>
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
> index b341dd0..39f883a 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
> @@ -60,6 +60,7 @@
> #include <rte_atomic.h>
> #include <rte_malloc.h>
> #include <rte_random.h>
> +#include <rte_spinlock.h>
> #include <rte_dev.h>
>
> #include "ixgbe_logs.h"
> @@ -173,6 +174,7 @@ static int ixgbe_dev_rss_reta_query(struct rte_eth_dev *dev,
> uint16_t reta_size);
> static void ixgbe_dev_link_status_print(struct rte_eth_dev *dev);
> static int ixgbe_dev_lsc_interrupt_setup(struct rte_eth_dev *dev);
> +static int ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev);
> static int ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev);
> static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev);
> static void ixgbe_dev_interrupt_handler(struct rte_intr_handle *handle,
> @@ -186,11 +188,14 @@ static void ixgbe_dcb_init(struct ixgbe_hw *hw,struct ixgbe_dcb_config *dcb_conf
> /* For Virtual Function support */
> static int eth_ixgbevf_dev_init(struct eth_driver *eth_drv,
> struct rte_eth_dev *eth_dev);
> +static int ixgbevf_dev_interrupt_get_status(struct rte_eth_dev *dev);
> +static int ixgbevf_dev_interrupt_action(struct rte_eth_dev *dev);
> static int ixgbevf_dev_configure(struct rte_eth_dev *dev);
> static int ixgbevf_dev_start(struct rte_eth_dev *dev);
> static void ixgbevf_dev_stop(struct rte_eth_dev *dev);
> static void ixgbevf_dev_close(struct rte_eth_dev *dev);
> static void ixgbevf_intr_disable(struct ixgbe_hw *hw);
> +static void ixgbevf_intr_enable(struct ixgbe_hw *hw);
> static void ixgbevf_dev_stats_get(struct rte_eth_dev *dev,
> struct rte_eth_stats *stats);
> static void ixgbevf_dev_stats_reset(struct rte_eth_dev *dev);
> @@ -198,8 +203,15 @@ static int ixgbevf_vlan_filter_set(struct rte_eth_dev *dev,
> uint16_t vlan_id, int on);
> static void ixgbevf_vlan_strip_queue_set(struct rte_eth_dev *dev,
> uint16_t queue, int on);
> +static void ixgbevf_set_ivar(struct ixgbe_hw *hw, s8 direction, u8 queue, u8 msix_vector);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> static void ixgbevf_vlan_offload_set(struct rte_eth_dev *dev, int mask);
> static void ixgbevf_set_vfta_all(struct rte_eth_dev *dev, bool on);
> +static void ixgbevf_dev_interrupt_handler(struct rte_intr_handle *handle,
> + void *param);
> +static int ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id);
> +static int ixgbevf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
> +static void ixgbevf_set_ivar(struct ixgbe_hw *hw, s8 direction, u8 queue, u8 msix_vector);
Yes re-claim static void ixgbevf_set_ivar() for twice? Or are they
different?
> +static void ixgbevf_configure_msix(struct ixgbe_hw *hw);
>
> /* For Eth VMDQ APIs support */
> static int ixgbe_uc_hash_table_set(struct rte_eth_dev *dev, struct
> @@ -217,6 +229,11 @@ static int ixgbe_mirror_rule_set(struct rte_eth_dev *dev,
> static int ixgbe_mirror_rule_reset(struct rte_eth_dev *dev,
> uint8_t rule_id);
[...]
> +static void
> +ixgbe_configure_msix(struct ixgbe_hw *hw)
> +{
> + int queue_id;
> + u32 mask;
> + u32 gpie;
> +
> + /* set GPIE for in MSI-x mode */
> + gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
> + gpie = IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_PBA_SUPPORT |
> + IXGBE_GPIE_OCD;
> + gpie |= IXGBE_GPIE_EIAME;
As you will override gpie with other flags why need to read the reg and
save to gpie first?
Maybe read the reg to reset?
I guess should be:
+ gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
+ gpie |= IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_PBA_SUPPORT |
+ IXGBE_GPIE_OCD | IXGBE_GPIE_EIAME;
Maybe not correct as I not familiar with IXGBE.
> + /*
> + * use EIAM to auto-mask when MSI-X interrupt is asserted
> + * this saves a register write for every interrupt
> + */
> + switch (hw->mac.type) {
> + case ixgbe_mac_82598EB:
> + IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
> + break;
> + case ixgbe_mac_82599EB:
> + case ixgbe_mac_X540:
> + default:
> + IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
> + IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
> + break;
> + }
> + IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
> +
> + /*
> + * Populate the IVAR table and set the ITR values to the
> + * corresponding register.
> + */
> + for (queue_id = 0; queue_id < VFIO_MAX_QUEUE_ID; queue_id++)
> + ixgbe_set_ivar(hw, 0, queue_id, queue_id);
> +
> + switch (hw->mac.type) {
> + case ixgbe_mac_82598EB:
> + ixgbe_set_ivar(hw, -1, IXGBE_IVAR_OTHER_CAUSES_INDEX,
> + VFIO_MAX_QUEUE_ID);
> + break;
> + case ixgbe_mac_82599EB:
> + case ixgbe_mac_X540:
> + ixgbe_set_ivar(hw, -1, 1, 32);
May be better to make those values for a macro just as above.
> + break;
> + default:
> + break;
> + }
> + IXGBE_WRITE_REG(hw, IXGBE_EITR(queue_id), 1950);
Also here, what's "1950" stands for?
> +
> + /* set up to autoclear timer, and the vectors */
> + mask = IXGBE_EIMS_ENABLE_MASK;
> + mask &= ~(IXGBE_EIMS_OTHER |
> + IXGBE_EIMS_MAILBOX |
> + IXGBE_EIMS_LSC);
> +
> + IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
> +}
> +
> static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev,
> uint16_t queue_idx, uint16_t tx_rate)
> {
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> index 1383194..328c387 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> @@ -38,6 +38,8 @@
> #include "ixgbe/ixgbe_dcb_82598.h"
> #include "ixgbe_bypass.h"
>
> +#include <rte_spinlock.h>
> +
> /* need update link, bit flag */
> #define IXGBE_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
> #define IXGBE_FLAG_MAILBOX (uint32_t)(1 << 1)
> @@ -98,6 +100,11 @@
> #define IXGBE_5TUPLE_MAX_PRI 7
> #define IXGBE_5TUPLE_MIN_PRI 1
>
> +#define IXGBE_VF_IRQ_ENABLE_MASK 3 /* vf interrupt enable mask */
> +#define IXGBE_VF_MAXMSIVECTOR 1
> +/* maximum other interrupts besides rx&tx*/
> +#define IXGBE_MAX_OTHER_INTR 1
> +#define IXGBEVF_MAX_OTHER_INTR 1
> /*
> * Information about the fdir mode.
> */
> @@ -116,6 +123,7 @@ struct ixgbe_hw_fdir_info {
> struct ixgbe_interrupt {
> uint32_t flags;
> uint32_t mask;
> + rte_spinlock_t lock;
> };
>
> struct ixgbe_stat_mapping_registers {
> @@ -260,6 +268,7 @@ uint32_t ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev,
> uint16_t rx_queue_id);
>
> int ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset);
> +int ixgbevf_dev_rx_descriptor_done(void *rx_queue, uint16_t offset);
>
> int ixgbe_dev_rx_init(struct rte_eth_dev *dev);
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [dpdk-dev] [PATCH v1 2/5] ixgbe: enable rx queue interrupts for both PF and VF
2015-01-29 3:40 ` Qiu, Michael
@ 2015-01-29 5:39 ` Zhou, Danny
2015-01-29 10:13 ` Qiu, Michael
0 siblings, 1 reply; 16+ messages in thread
From: Zhou, Danny @ 2015-01-29 5:39 UTC (permalink / raw)
To: Qiu, Michael, dev
> -----Original Message-----
> From: Qiu, Michael
> Sent: Thursday, January 29, 2015 11:40 AM
> To: Zhou, Danny; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v1 2/5] ixgbe: enable rx queue interrupts for both PF and VF
>
> On 1/28/2015 5:52 PM, Danny Zhou wrote:
> > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > ---
> > lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 371 ++++++++++++++++++++++++++++++++++++
> > lib/librte_pmd_ixgbe/ixgbe_ethdev.h | 9 +
> > 2 files changed, 380 insertions(+)
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
> > index b341dd0..39f883a 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
> > @@ -60,6 +60,7 @@
> > #include <rte_atomic.h>
> > #include <rte_malloc.h>
> > #include <rte_random.h>
> > +#include <rte_spinlock.h>
> > #include <rte_dev.h>
> >
> > #include "ixgbe_logs.h"
> > @@ -173,6 +174,7 @@ static int ixgbe_dev_rss_reta_query(struct rte_eth_dev *dev,
> > uint16_t reta_size);
> > static void ixgbe_dev_link_status_print(struct rte_eth_dev *dev);
> > static int ixgbe_dev_lsc_interrupt_setup(struct rte_eth_dev *dev);
> > +static int ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev);
> > static int ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev);
> > static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev);
> > static void ixgbe_dev_interrupt_handler(struct rte_intr_handle *handle,
> > @@ -186,11 +188,14 @@ static void ixgbe_dcb_init(struct ixgbe_hw *hw,struct ixgbe_dcb_config *dcb_conf
> > /* For Virtual Function support */
> > static int eth_ixgbevf_dev_init(struct eth_driver *eth_drv,
> > struct rte_eth_dev *eth_dev);
> > +static int ixgbevf_dev_interrupt_get_status(struct rte_eth_dev *dev);
> > +static int ixgbevf_dev_interrupt_action(struct rte_eth_dev *dev);
> > static int ixgbevf_dev_configure(struct rte_eth_dev *dev);
> > static int ixgbevf_dev_start(struct rte_eth_dev *dev);
> > static void ixgbevf_dev_stop(struct rte_eth_dev *dev);
> > static void ixgbevf_dev_close(struct rte_eth_dev *dev);
> > static void ixgbevf_intr_disable(struct ixgbe_hw *hw);
> > +static void ixgbevf_intr_enable(struct ixgbe_hw *hw);
> > static void ixgbevf_dev_stats_get(struct rte_eth_dev *dev,
> > struct rte_eth_stats *stats);
> > static void ixgbevf_dev_stats_reset(struct rte_eth_dev *dev);
> > @@ -198,8 +203,15 @@ static int ixgbevf_vlan_filter_set(struct rte_eth_dev *dev,
> > uint16_t vlan_id, int on);
> > static void ixgbevf_vlan_strip_queue_set(struct rte_eth_dev *dev,
> > uint16_t queue, int on);
> > +static void ixgbevf_set_ivar(struct ixgbe_hw *hw, s8 direction, u8 queue, u8 msix_vector);
>
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> > static void ixgbevf_vlan_offload_set(struct rte_eth_dev *dev, int mask);
> > static void ixgbevf_set_vfta_all(struct rte_eth_dev *dev, bool on);
> > +static void ixgbevf_dev_interrupt_handler(struct rte_intr_handle *handle,
> > + void *param);
> > +static int ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id);
> > +static int ixgbevf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
> > +static void ixgbevf_set_ivar(struct ixgbe_hw *hw, s8 direction, u8 queue, u8 msix_vector);
>
> Yes re-claim static void ixgbevf_set_ivar() for twice? Or are they
> different?
>
Good catch.
> > +static void ixgbevf_configure_msix(struct ixgbe_hw *hw);
> >
> > /* For Eth VMDQ APIs support */
> > static int ixgbe_uc_hash_table_set(struct rte_eth_dev *dev, struct
> > @@ -217,6 +229,11 @@ static int ixgbe_mirror_rule_set(struct rte_eth_dev *dev,
> > static int ixgbe_mirror_rule_reset(struct rte_eth_dev *dev,
> > uint8_t rule_id);
>
> [...]
> > +static void
> > +ixgbe_configure_msix(struct ixgbe_hw *hw)
> > +{
> > + int queue_id;
> > + u32 mask;
> > + u32 gpie;
> > +
> > + /* set GPIE for in MSI-x mode */
> > + gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
> > + gpie = IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_PBA_SUPPORT |
> > + IXGBE_GPIE_OCD;
> > + gpie |= IXGBE_GPIE_EIAME;
>
> As you will override gpie with other flags why need to read the reg and
> save to gpie first?
>
> Maybe read the reg to reset?
>
> I guess should be:
>
> + gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
> + gpie |= IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_PBA_SUPPORT |
> + IXGBE_GPIE_OCD | IXGBE_GPIE_EIAME;
>
> Maybe not correct as I not familiar with IXGBE.
>
>
Accepted.
> > + /*
> > + * use EIAM to auto-mask when MSI-X interrupt is asserted
> > + * this saves a register write for every interrupt
> > + */
> > + switch (hw->mac.type) {
> > + case ixgbe_mac_82598EB:
> > + IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
> > + break;
> > + case ixgbe_mac_82599EB:
> > + case ixgbe_mac_X540:
> > + default:
> > + IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
> > + IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
> > + break;
> > + }
> > + IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
> > +
> > + /*
> > + * Populate the IVAR table and set the ITR values to the
> > + * corresponding register.
> > + */
> > + for (queue_id = 0; queue_id < VFIO_MAX_QUEUE_ID; queue_id++)
> > + ixgbe_set_ivar(hw, 0, queue_id, queue_id);
> > +
> > + switch (hw->mac.type) {
> > + case ixgbe_mac_82598EB:
> > + ixgbe_set_ivar(hw, -1, IXGBE_IVAR_OTHER_CAUSES_INDEX,
> > + VFIO_MAX_QUEUE_ID);
> > + break;
> > + case ixgbe_mac_82599EB:
> > + case ixgbe_mac_X540:
> > + ixgbe_set_ivar(hw, -1, 1, 32);
>
> May be better to make those values for a macro just as above.
>
To be fixed in V2.
> > + break;
> > + default:
> > + break;
> > + }
> > + IXGBE_WRITE_REG(hw, IXGBE_EITR(queue_id), 1950);
>
> Also here, what's "1950" stands for?
>
It is an experienced interrupt throttle value which is used for interrupt support
> > +
> > + /* set up to autoclear timer, and the vectors */
> > + mask = IXGBE_EIMS_ENABLE_MASK;
> > + mask &= ~(IXGBE_EIMS_OTHER |
> > + IXGBE_EIMS_MAILBOX |
> > + IXGBE_EIMS_LSC);
> > +
> > + IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
> > +}
> > +
> > static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev,
> > uint16_t queue_idx, uint16_t tx_rate)
> > {
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> > index 1383194..328c387 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> > @@ -38,6 +38,8 @@
> > #include "ixgbe/ixgbe_dcb_82598.h"
> > #include "ixgbe_bypass.h"
> >
> > +#include <rte_spinlock.h>
> > +
> > /* need update link, bit flag */
> > #define IXGBE_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
> > #define IXGBE_FLAG_MAILBOX (uint32_t)(1 << 1)
> > @@ -98,6 +100,11 @@
> > #define IXGBE_5TUPLE_MAX_PRI 7
> > #define IXGBE_5TUPLE_MIN_PRI 1
> >
> > +#define IXGBE_VF_IRQ_ENABLE_MASK 3 /* vf interrupt enable mask */
> > +#define IXGBE_VF_MAXMSIVECTOR 1
> > +/* maximum other interrupts besides rx&tx*/
> > +#define IXGBE_MAX_OTHER_INTR 1
> > +#define IXGBEVF_MAX_OTHER_INTR 1
> > /*
> > * Information about the fdir mode.
> > */
> > @@ -116,6 +123,7 @@ struct ixgbe_hw_fdir_info {
> > struct ixgbe_interrupt {
> > uint32_t flags;
> > uint32_t mask;
> > + rte_spinlock_t lock;
> > };
> >
> > struct ixgbe_stat_mapping_registers {
> > @@ -260,6 +268,7 @@ uint32_t ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev,
> > uint16_t rx_queue_id);
> >
> > int ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset);
> > +int ixgbevf_dev_rx_descriptor_done(void *rx_queue, uint16_t offset);
> >
> > int ixgbe_dev_rx_init(struct rte_eth_dev *dev);
> >
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [dpdk-dev] [PATCH v1 2/5] ixgbe: enable rx queue interrupts for both PF and VF
2015-01-29 5:39 ` Zhou, Danny
@ 2015-01-29 10:13 ` Qiu, Michael
0 siblings, 0 replies; 16+ messages in thread
From: Qiu, Michael @ 2015-01-29 10:13 UTC (permalink / raw)
To: Zhou, Danny, dev
On 1/29/2015 1:39 PM, Zhou, Danny wrote:
>
>> -----Original Message-----
>> From: Qiu, Michael
>> Sent: Thursday, January 29, 2015 11:40 AM
>> To: Zhou, Danny; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v1 2/5] ixgbe: enable rx queue interrupts for both PF and VF
>>
>> On 1/28/2015 5:52 PM, Danny Zhou wrote:
>>> Signed-off-by: Danny Zhou <danny.zhou@intel.com>
>>> ---
>>> lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 371 ++++++++++++++++++++++++++++++++++++
>>> lib/librte_pmd_ixgbe/ixgbe_ethdev.h | 9 +
>>> 2 files changed, 380 insertions(+)
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
>>> index b341dd0..39f883a 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
>>> @@ -60,6 +60,7 @@
>>> #include <rte_atomic.h>
>>> #include <rte_malloc.h>
>>> #include <rte_random.h>
>>> +#include <rte_spinlock.h>
>>> #include <rte_dev.h>
>>>
>>> #include "ixgbe_logs.h"
>>> @@ -173,6 +174,7 @@ static int ixgbe_dev_rss_reta_query(struct rte_eth_dev *dev,
>>> uint16_t reta_size);
>>> static void ixgbe_dev_link_status_print(struct rte_eth_dev *dev);
>>> static int ixgbe_dev_lsc_interrupt_setup(struct rte_eth_dev *dev);
>>> +static int ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev);
>>> static int ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev);
>>> static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev);
>>> static void ixgbe_dev_interrupt_handler(struct rte_intr_handle *handle,
>>> @@ -186,11 +188,14 @@ static void ixgbe_dcb_init(struct ixgbe_hw *hw,struct ixgbe_dcb_config *dcb_conf
>>> /* For Virtual Function support */
>>> static int eth_ixgbevf_dev_init(struct eth_driver *eth_drv,
>>> struct rte_eth_dev *eth_dev);
>>> +static int ixgbevf_dev_interrupt_get_status(struct rte_eth_dev *dev);
>>> +static int ixgbevf_dev_interrupt_action(struct rte_eth_dev *dev);
>>> static int ixgbevf_dev_configure(struct rte_eth_dev *dev);
>>> static int ixgbevf_dev_start(struct rte_eth_dev *dev);
>>> static void ixgbevf_dev_stop(struct rte_eth_dev *dev);
>>> static void ixgbevf_dev_close(struct rte_eth_dev *dev);
>>> static void ixgbevf_intr_disable(struct ixgbe_hw *hw);
>>> +static void ixgbevf_intr_enable(struct ixgbe_hw *hw);
>>> static void ixgbevf_dev_stats_get(struct rte_eth_dev *dev,
>>> struct rte_eth_stats *stats);
>>> static void ixgbevf_dev_stats_reset(struct rte_eth_dev *dev);
>>> @@ -198,8 +203,15 @@ static int ixgbevf_vlan_filter_set(struct rte_eth_dev *dev,
>>> uint16_t vlan_id, int on);
>>> static void ixgbevf_vlan_strip_queue_set(struct rte_eth_dev *dev,
>>> uint16_t queue, int on);
>>> +static void ixgbevf_set_ivar(struct ixgbe_hw *hw, s8 direction, u8 queue, u8 msix_vector);
>> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>>> static void ixgbevf_vlan_offload_set(struct rte_eth_dev *dev, int mask);
>>> static void ixgbevf_set_vfta_all(struct rte_eth_dev *dev, bool on);
>>> +static void ixgbevf_dev_interrupt_handler(struct rte_intr_handle *handle,
>>> + void *param);
>>> +static int ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id);
>>> +static int ixgbevf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
>>> +static void ixgbevf_set_ivar(struct ixgbe_hw *hw, s8 direction, u8 queue, u8 msix_vector);
>> Yes re-claim static void ixgbevf_set_ivar() for twice? Or are they
>> different?
>>
> Good catch.
>
>>> +static void ixgbevf_configure_msix(struct ixgbe_hw *hw);
>>>
>>> /* For Eth VMDQ APIs support */
>>> static int ixgbe_uc_hash_table_set(struct rte_eth_dev *dev, struct
>>> @@ -217,6 +229,11 @@ static int ixgbe_mirror_rule_set(struct rte_eth_dev *dev,
>>> static int ixgbe_mirror_rule_reset(struct rte_eth_dev *dev,
>>> uint8_t rule_id);
>> [...]
>>> +static void
>>> +ixgbe_configure_msix(struct ixgbe_hw *hw)
>>> +{
>>> + int queue_id;
>>> + u32 mask;
>>> + u32 gpie;
>>> +
>>> + /* set GPIE for in MSI-x mode */
>>> + gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
>>> + gpie = IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_PBA_SUPPORT |
>>> + IXGBE_GPIE_OCD;
>>> + gpie |= IXGBE_GPIE_EIAME;
>> As you will override gpie with other flags why need to read the reg and
>> save to gpie first?
>>
>> Maybe read the reg to reset?
>>
>> I guess should be:
>>
>> + gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
>> + gpie |= IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_PBA_SUPPORT |
>> + IXGBE_GPIE_OCD | IXGBE_GPIE_EIAME;
>>
>> Maybe not correct as I not familiar with IXGBE.
>>
>>
> Accepted.
>
>>> + /*
>>> + * use EIAM to auto-mask when MSI-X interrupt is asserted
>>> + * this saves a register write for every interrupt
>>> + */
>>> + switch (hw->mac.type) {
>>> + case ixgbe_mac_82598EB:
>>> + IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
>>> + break;
>>> + case ixgbe_mac_82599EB:
>>> + case ixgbe_mac_X540:
>>> + default:
>>> + IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
>>> + IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
>>> + break;
>>> + }
>>> + IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
>>> +
>>> + /*
>>> + * Populate the IVAR table and set the ITR values to the
>>> + * corresponding register.
>>> + */
>>> + for (queue_id = 0; queue_id < VFIO_MAX_QUEUE_ID; queue_id++)
>>> + ixgbe_set_ivar(hw, 0, queue_id, queue_id);
>>> +
>>> + switch (hw->mac.type) {
>>> + case ixgbe_mac_82598EB:
>>> + ixgbe_set_ivar(hw, -1, IXGBE_IVAR_OTHER_CAUSES_INDEX,
>>> + VFIO_MAX_QUEUE_ID);
>>> + break;
>>> + case ixgbe_mac_82599EB:
>>> + case ixgbe_mac_X540:
>>> + ixgbe_set_ivar(hw, -1, 1, 32);
>> May be better to make those values for a macro just as above.
>>
> To be fixed in V2.
>
>>> + break;
>>> + default:
>>> + break;
>>> + }
>>> + IXGBE_WRITE_REG(hw, IXGBE_EITR(queue_id), 1950);
>> Also here, what's "1950" stands for?
>>
> It is an experienced interrupt throttle value which is used for interrupt support
Then, I think it is better to define it in a macro and do some brief
introduction on this value.
Thanks,
Michael
>>> +
>>> + /* set up to autoclear timer, and the vectors */
>>> + mask = IXGBE_EIMS_ENABLE_MASK;
>>> + mask &= ~(IXGBE_EIMS_OTHER |
>>> + IXGBE_EIMS_MAILBOX |
>>> + IXGBE_EIMS_LSC);
>>> +
>>> + IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
>>> +}
>>> +
>>> static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev,
>>> uint16_t queue_idx, uint16_t tx_rate)
>>> {
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
>>> index 1383194..328c387 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
>>> @@ -38,6 +38,8 @@
>>> #include "ixgbe/ixgbe_dcb_82598.h"
>>> #include "ixgbe_bypass.h"
>>>
>>> +#include <rte_spinlock.h>
>>> +
>>> /* need update link, bit flag */
>>> #define IXGBE_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
>>> #define IXGBE_FLAG_MAILBOX (uint32_t)(1 << 1)
>>> @@ -98,6 +100,11 @@
>>> #define IXGBE_5TUPLE_MAX_PRI 7
>>> #define IXGBE_5TUPLE_MIN_PRI 1
>>>
>>> +#define IXGBE_VF_IRQ_ENABLE_MASK 3 /* vf interrupt enable mask */
>>> +#define IXGBE_VF_MAXMSIVECTOR 1
>>> +/* maximum other interrupts besides rx&tx*/
>>> +#define IXGBE_MAX_OTHER_INTR 1
>>> +#define IXGBEVF_MAX_OTHER_INTR 1
>>> /*
>>> * Information about the fdir mode.
>>> */
>>> @@ -116,6 +123,7 @@ struct ixgbe_hw_fdir_info {
>>> struct ixgbe_interrupt {
>>> uint32_t flags;
>>> uint32_t mask;
>>> + rte_spinlock_t lock;
>>> };
>>>
>>> struct ixgbe_stat_mapping_registers {
>>> @@ -260,6 +268,7 @@ uint32_t ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev,
>>> uint16_t rx_queue_id);
>>>
>>> int ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset);
>>> +int ixgbevf_dev_rx_descriptor_done(void *rx_queue, uint16_t offset);
>>>
>>> int ixgbe_dev_rx_init(struct rte_eth_dev *dev);
>>>
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* [dpdk-dev] [PATCH v1 3/5] igb: enable rx queue interrupts for PF
2015-01-28 9:50 [dpdk-dev] [PATCH v1 0/5] Interrupt mode for PMD Danny Zhou
2015-01-28 9:50 ` [dpdk-dev] [PATCH v1 1/5] ethdev: add rx interrupt enable/disable functions Danny Zhou
2015-01-28 9:50 ` [dpdk-dev] [PATCH v1 2/5] ixgbe: enable rx queue interrupts for both PF and VF Danny Zhou
@ 2015-01-28 9:50 ` Danny Zhou
2015-01-28 9:50 ` [dpdk-dev] [PATCH v1 4/5] eal: add per rx queue interrupt handling based on VFIO Danny Zhou
2015-01-28 9:50 ` [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch Danny Zhou
4 siblings, 0 replies; 16+ messages in thread
From: Danny Zhou @ 2015-01-28 9:50 UTC (permalink / raw)
To: dev
Signed-off-by: Danny Zhou <danny.zhou@intel.com>
---
lib/librte_pmd_e1000/e1000/e1000_hw.h | 3 +
lib/librte_pmd_e1000/e1000_ethdev.h | 6 +
lib/librte_pmd_e1000/igb_ethdev.c | 265 ++++++++++++++++++++++++++++++----
3 files changed, 249 insertions(+), 25 deletions(-)
diff --git a/lib/librte_pmd_e1000/e1000/e1000_hw.h b/lib/librte_pmd_e1000/e1000/e1000_hw.h
index 4dd92a3..9b999ec 100644
--- a/lib/librte_pmd_e1000/e1000/e1000_hw.h
+++ b/lib/librte_pmd_e1000/e1000/e1000_hw.h
@@ -780,6 +780,9 @@ struct e1000_mac_info {
u16 mta_reg_count;
u16 uta_reg_count;
+ u32 max_rx_queues;
+ u32 max_tx_queues;
+
/* Maximum size of the MTA register table in all supported adapters */
#define MAX_MTA_REG 128
u32 mta_shadow[MAX_MTA_REG];
diff --git a/lib/librte_pmd_e1000/e1000_ethdev.h b/lib/librte_pmd_e1000/e1000_ethdev.h
index d155e77..713ca11 100644
--- a/lib/librte_pmd_e1000/e1000_ethdev.h
+++ b/lib/librte_pmd_e1000/e1000_ethdev.h
@@ -34,6 +34,8 @@
#ifndef _E1000_ETHDEV_H_
#define _E1000_ETHDEV_H_
+#include <rte_spinlock.h>
+
/* need update link, bit flag */
#define E1000_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
#define E1000_FLAG_MAILBOX (uint32_t)(1 << 1)
@@ -105,10 +107,14 @@
#define E1000_FTQF_QUEUE_SHIFT 16
#define E1000_FTQF_QUEUE_ENABLE 0x00000100
+/* maximum number of other interrupts besides Rx & Tx interrupts */
+#define E1000_MAX_OTHER_INTR 1
+
/* structure for interrupt relative data */
struct e1000_interrupt {
uint32_t flags;
uint32_t mask;
+ rte_spinlock_t lock;
};
/* local vfta copy */
diff --git a/lib/librte_pmd_e1000/igb_ethdev.c b/lib/librte_pmd_e1000/igb_ethdev.c
index 2a268b8..2a9bf00 100644
--- a/lib/librte_pmd_e1000/igb_ethdev.c
+++ b/lib/librte_pmd_e1000/igb_ethdev.c
@@ -97,6 +97,7 @@ static int eth_igb_flow_ctrl_get(struct rte_eth_dev *dev,
static int eth_igb_flow_ctrl_set(struct rte_eth_dev *dev,
struct rte_eth_fc_conf *fc_conf);
static int eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev);
+static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev);
static int eth_igb_interrupt_get_status(struct rte_eth_dev *dev);
static int eth_igb_interrupt_action(struct rte_eth_dev *dev);
static void eth_igb_interrupt_handler(struct rte_intr_handle *handle,
@@ -191,6 +192,12 @@ static int eth_igb_filter_ctrl(struct rte_eth_dev *dev,
enum rte_filter_op filter_op,
void *arg);
+static int eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id);
+static int eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
+static void eth_igb_assign_vector(struct e1000_hw *hw, s8 direction, u8 queue, u8 msix_vector);
+static void eth_igb_configure_msix(struct e1000_hw *hw);
+static void eth_igb_write_ivar(struct e1000_hw *hw, u8 msix_vector, u8 index, u8 offset);
+
/*
* Define VF Stats MACRO for Non "cleared on read" register
*/
@@ -250,6 +257,8 @@ static struct eth_dev_ops eth_igb_ops = {
.vlan_tpid_set = eth_igb_vlan_tpid_set,
.vlan_offload_set = eth_igb_vlan_offload_set,
.rx_queue_setup = eth_igb_rx_queue_setup,
+ .rx_queue_intr_enable = eth_igb_rx_queue_intr_enable,
+ .rx_queue_intr_disable = eth_igb_rx_queue_intr_disable,
.rx_queue_release = eth_igb_rx_queue_release,
.rx_queue_count = eth_igb_rx_queue_count,
.rx_descriptor_done = eth_igb_rx_descriptor_done,
@@ -592,6 +601,16 @@ eth_igb_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
eth_dev->data->port_id, pci_dev->id.vendor_id,
pci_dev->id.device_id);
+ /* set max interrupt vfio request */
+ struct rte_eth_dev_info dev_info;
+
+ memset(&dev_info, 0, sizeof(dev_info));
+ eth_igb_infos_get(eth_dev, &dev_info);
+
+ hw->mac.max_rx_queues = dev_info.max_rx_queues;
+
+ pci_dev->intr_handle.max_intr = hw->mac.max_rx_queues + E1000_MAX_OTHER_INTR;
+
rte_intr_callback_register(&(pci_dev->intr_handle),
eth_igb_interrupt_handler, (void *)eth_dev);
@@ -754,7 +773,7 @@ eth_igb_start(struct rte_eth_dev *dev)
{
struct e1000_hw *hw =
E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
- int ret, i, mask;
+ int ret, mask;
uint32_t ctrl_ext;
PMD_INIT_FUNC_TRACE();
@@ -794,6 +813,9 @@ eth_igb_start(struct rte_eth_dev *dev)
/* configure PF module if SRIOV enabled */
igb_pf_host_configure(dev);
+ /* confiugre msix for sleep until rx interrupt */
+ eth_igb_configure_msix(hw);
+
/* Configure for OS presence */
igb_init_manageability(hw);
@@ -821,33 +843,9 @@ eth_igb_start(struct rte_eth_dev *dev)
igb_vmdq_vlan_hw_filter_enable(dev);
}
- /*
- * Configure the Interrupt Moderation register (EITR) with the maximum
- * possible value (0xFFFF) to minimize "System Partial Write" issued by
- * spurious [DMA] memory updates of RX and TX ring descriptors.
- *
- * With a EITR granularity of 2 microseconds in the 82576, only 7/8
- * spurious memory updates per second should be expected.
- * ((65535 * 2) / 1000.1000 ~= 0.131 second).
- *
- * Because interrupts are not used at all, the MSI-X is not activated
- * and interrupt moderation is controlled by EITR[0].
- *
- * Note that having [almost] disabled memory updates of RX and TX ring
- * descriptors through the Interrupt Moderation mechanism, memory
- * updates of ring descriptors are now moderated by the configurable
- * value of Write-Back Threshold registers.
- */
if ((hw->mac.type == e1000_82576) || (hw->mac.type == e1000_82580) ||
(hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i210) ||
(hw->mac.type == e1000_i211)) {
- uint32_t ivar;
-
- /* Enable all RX & TX queues in the IVAR registers */
- ivar = (uint32_t) ((E1000_IVAR_VALID << 16) | E1000_IVAR_VALID);
- for (i = 0; i < 8; i++)
- E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, i, ivar);
-
/* Configure EITR with the maximum possible value (0xFFFF) */
E1000_WRITE_REG(hw, E1000_EITR(0), 0xFFFF);
}
@@ -901,6 +899,10 @@ eth_igb_start(struct rte_eth_dev *dev)
if (dev->data->dev_conf.intr_conf.lsc != 0)
ret = eth_igb_lsc_interrupt_setup(dev);
+ /* check if rxq interrupt is enabled */
+ if (dev->data->dev_conf.intr_conf.rxq != 0)
+ eth_igb_rxq_interrupt_setup(dev);
+
/* resume enabled intr since hw reset */
igb_intr_enable(dev);
@@ -1791,6 +1793,35 @@ eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev)
E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
intr->mask |= E1000_ICR_LSC;
+ rte_spinlock_init(&(intr->lock));
+
+ return 0;
+}
+
+/*
+ * It clears the interrupt causes and enables the interrupt.
+ * It will be called once only during nic initialized.
+ *
+ * @param dev
+ * Pointer to struct rte_eth_dev.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev)
+{
+ uint32_t mask, regval;
+ struct e1000_hw *hw =
+ E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct rte_eth_dev_info dev_info;
+
+ memset(&dev_info, 0, sizeof(dev_info));
+ eth_igb_infos_get(dev, &dev_info);
+
+ mask = 0xFFFFFFFF >> (32 - dev_info.max_rx_queues);
+ regval = E1000_READ_REG(hw, E1000_EIMS);
+ E1000_WRITE_REG(hw, E1000_EIMS, regval | mask);
return 0;
}
@@ -3256,5 +3287,189 @@ static struct rte_driver pmd_igbvf_drv = {
.init = rte_igbvf_pmd_init,
};
+static int
+eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ struct e1000_hw *hw =
+ E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct e1000_interrupt *intr =
+ E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+ uint32_t mask = 1 << queue_id;
+
+ rte_spinlock_lock(&(intr->lock));
+ E1000_WRITE_REG(hw, E1000_EIMC, mask);
+ E1000_WRITE_FLUSH(hw);
+ rte_spinlock_unlock(&(intr->lock));
+
+ return 0;
+}
+
+static int
+eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ struct e1000_hw *hw =
+ E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct e1000_interrupt *intr =
+ E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+ uint32_t mask = 1 << queue_id;
+ uint32_t regval;
+
+ rte_spinlock_lock(&(intr->lock));
+ regval = E1000_READ_REG(hw, E1000_EIMS);
+ E1000_WRITE_REG(hw, E1000_EIMS, regval | mask);
+ E1000_WRITE_FLUSH(hw);
+ rte_spinlock_unlock(&(intr->lock));
+
+ return 0;
+}
+
+static void
+eth_igb_write_ivar(struct e1000_hw *hw, u8 msix_vector, u8 index, u8 offset)
+{
+ uint32_t ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
+
+ /* clear any bits that are currently set */
+ ivar &= ~((uint32_t)0xFF << offset);
+
+ /* write vector and valid bit */
+ ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
+
+ E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
+}
+
+static void
+eth_igb_assign_vector(struct e1000_hw *hw, s8 direction, u8 queue, u8 msix_vector)
+{
+ uint32_t msixbm = 0;
+ switch (hw->mac.type) {
+ case e1000_82575:
+ /* The 82575 assigns vectors using a bitmask, which matches the
+ * bitmask for the EICR/EIMS/EIMC registers. To assign one
+ * or more queues to a vector, we write the appropriate bits
+ * into the MSIXBM register for that vector.
+ */
+ if (direction == 0)
+ msixbm = E1000_EICR_RX_QUEUE0 << queue;
+ else if (direction == 1)
+ msixbm = E1000_EICR_TX_QUEUE0 << queue;
+ E1000_WRITE_REG(hw, E1000_MSIXBM(msix_vector), msixbm);
+ break;
+ case e1000_82576:
+ /* 82576 uses a table that essentially consists of 2 columns
+ * with 8 rows. The ordering is column-major so we use the
+ * lower 3 bits as the row index, and the 4th bit as the
+ * column offset.
+ */
+ if (direction == 0)
+ eth_igb_write_ivar(hw, msix_vector, queue & 0x7,
+ (queue & 0x8) << 1);
+ else if (direction == 1)
+ eth_igb_write_ivar(hw, msix_vector, queue & 0x7,
+ ((queue & 0x8) << 1) + 8);
+ break;
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i354:
+ case e1000_i210:
+ case e1000_i211:
+ /* On 82580 and newer adapters the scheme is similar to 82576
+ * however instead of ordering column-major we have things
+ * ordered row-major. So we traverse the table by using
+ * bit 0 as the column offset, and the remaining bits as the
+ * row index.
+ */
+ if (direction == 0)
+ eth_igb_write_ivar(hw, msix_vector,
+ queue >> 1,
+ (queue & 0x1) << 4);
+ else if (direction == 1)
+ eth_igb_write_ivar(hw, msix_vector,
+ queue >> 1,
+ ((queue & 0x1) << 4) + 8);
+ break;
+ default:
+ break;
+ }
+}
+
+/*
+ * eth_igb_configure_msix - Configure MSI-X hardware
+ * @hw: board private structure
+ * eth_igb_configure_msix sets up the hardware to properly generate MSI-X
+ * interrupts.
+ */
+static void
+eth_igb_configure_msix(struct e1000_hw *hw)
+{
+ int queue_id;
+ uint32_t tmp, regval, mask;
+ uint32_t max_rx_queues = hw->mac.max_rx_queues;
+
+ /* set vector for other causes, i.e. link changes */
+ switch (hw->mac.type) {
+ case e1000_82575:
+ tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ /* enable MSI-X PBA support */
+ tmp |= E1000_CTRL_EXT_PBA_CLR;
+
+ /* Auto-Mask interrupts upon ICR read */
+ tmp |= E1000_CTRL_EXT_EIAME;
+ tmp |= E1000_CTRL_EXT_IRCA;
+
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
+
+ /* enable msix_other interrupt */
+ E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), 0, E1000_EIMS_OTHER);
+ regval = E1000_READ_REG(hw, E1000_EIAC);
+ E1000_WRITE_REG(hw, E1000_EIAC, regval | E1000_EIMS_OTHER);
+ regval = E1000_READ_REG(hw, E1000_EIAM);
+ E1000_WRITE_REG(hw, E1000_EIMS, regval | E1000_EIMS_OTHER);
+
+ break;
+ case e1000_82576:
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i354:
+ case e1000_i210:
+ case e1000_i211:
+ /* Turn on MSI-X capability first, or our settings won't stick.
+ * And it will take days to debug.
+ */
+ E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE |
+ E1000_GPIE_PBA | E1000_GPIE_EIAME |
+ E1000_GPIE_NSICR);
+
+ /* enable msix_other interrupt */
+ mask = 1 << max_rx_queues;
+ regval = E1000_READ_REG(hw, E1000_EIAC);
+ E1000_WRITE_REG(hw, E1000_EIAC, regval | mask);
+ regval = E1000_READ_REG(hw, E1000_EIMS);
+ E1000_WRITE_REG(hw, E1000_EIMS, regval | mask);
+ tmp = (max_rx_queues | E1000_IVAR_VALID) << 8;
+
+ E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmp);
+ break;
+ default:
+ /* do nothing, since nothing else supports MSI-X */
+ break;
+ }
+
+ /*
+ * use EIAM and EIAC to auto-mask and auto-clear when MSI-X interrupt is asserted
+ * this saves a register write for every interrupt
+ */
+ mask = 0xFFFFFFFF >> (32 - max_rx_queues);
+ regval = E1000_READ_REG(hw, E1000_EIAC);
+ E1000_WRITE_REG(hw, E1000_EIAC, regval | mask);
+ regval = E1000_READ_REG(hw, E1000_EIAM);
+ E1000_WRITE_REG(hw, E1000_EIAM, regval | mask);
+
+ for (queue_id = 0; queue_id < VFIO_MAX_QUEUE_ID; queue_id++)
+ eth_igb_assign_vector(hw, 0, queue_id, queue_id);
+
+ E1000_WRITE_FLUSH(hw);
+}
+
+
PMD_REGISTER_DRIVER(pmd_igb_drv);
PMD_REGISTER_DRIVER(pmd_igbvf_drv);
--
1.8.1.4
^ permalink raw reply [flat|nested] 16+ messages in thread
* [dpdk-dev] [PATCH v1 4/5] eal: add per rx queue interrupt handling based on VFIO
2015-01-28 9:50 [dpdk-dev] [PATCH v1 0/5] Interrupt mode for PMD Danny Zhou
` (2 preceding siblings ...)
2015-01-28 9:50 ` [dpdk-dev] [PATCH v1 3/5] igb: enable rx queue interrupts for PF Danny Zhou
@ 2015-01-28 9:50 ` Danny Zhou
2015-01-28 18:10 ` Liang, Cunming
2015-01-29 5:06 ` Qiu, Michael
2015-01-28 9:50 ` [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch Danny Zhou
4 siblings, 2 replies; 16+ messages in thread
From: Danny Zhou @ 2015-01-28 9:50 UTC (permalink / raw)
To: dev
Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Signed-off-by: Yong Liu <yong.liu@intel.com>
---
lib/librte_eal/common/include/rte_eal.h | 9 +
lib/librte_eal/linuxapp/eal/eal_interrupts.c | 186 ++++++++++++++++-----
lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 11 +-
.../linuxapp/eal/include/exec-env/rte_interrupts.h | 4 +
4 files changed, 168 insertions(+), 42 deletions(-)
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index f4ecd2e..5f31aa5 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -150,6 +150,15 @@ int rte_eal_iopl_init(void);
* - On failure, a negative error value.
*/
int rte_eal_init(int argc, char **argv);
+
+/**
+ * @param port_id
+ * the port id
+ * @return
+ * - On success, return 0
+ */
+int rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id);
+
/**
* Usage function typedef used by the application usage function.
*
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index dc2668a..b120303 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -64,6 +64,7 @@
#include <rte_malloc.h>
#include <rte_errno.h>
#include <rte_spinlock.h>
+#include <rte_ethdev.h>
#include "eal_private.h"
#include "eal_vfio.h"
@@ -127,6 +128,7 @@ static pthread_t intr_thread;
#ifdef VFIO_PRESENT
#define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int))
+#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int) * (VFIO_MAX_QUEUE_ID + 1))
/* enable legacy (INTx) interrupts */
static int
@@ -221,7 +223,7 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) {
/* enable MSI-X interrupts */
static int
vfio_enable_msi(struct rte_intr_handle *intr_handle) {
- int len, ret;
+ int len, ret, max_intr;
char irq_set_buf[IRQ_SET_BUF_LEN];
struct vfio_irq_set *irq_set;
int *fd_ptr;
@@ -230,12 +232,19 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
irq_set = (struct vfio_irq_set *) irq_set_buf;
irq_set->argsz = len;
- irq_set->count = 1;
+ if ((!intr_handle->max_intr) ||
+ (intr_handle->max_intr > VFIO_MAX_QUEUE_ID))
+ max_intr = VFIO_MAX_QUEUE_ID + 1;
+ else
+ max_intr = intr_handle->max_intr;
+
+ irq_set->count = max_intr;
irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
irq_set->start = 0;
fd_ptr = (int *) &irq_set->data;
- *fd_ptr = intr_handle->fd;
+ memcpy(fd_ptr, intr_handle->queue_fd, sizeof(intr_handle->queue_fd));
+ fd_ptr[max_intr - 1] = intr_handle->fd;
ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
@@ -244,23 +253,6 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
intr_handle->fd);
return -1;
}
-
- /* manually trigger interrupt to enable it */
- memset(irq_set, 0, len);
- len = sizeof(struct vfio_irq_set);
- irq_set->argsz = len;
- irq_set->count = 1;
- irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
- irq_set->start = 0;
-
- ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
-
- if (ret) {
- RTE_LOG(ERR, EAL, "Error triggering MSI interrupts for fd %d\n",
- intr_handle->fd);
- return -1;
- }
return 0;
}
@@ -292,8 +284,8 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) {
/* enable MSI-X interrupts */
static int
vfio_enable_msix(struct rte_intr_handle *intr_handle) {
- int len, ret;
- char irq_set_buf[IRQ_SET_BUF_LEN];
+ int len, ret, max_intr;
+ char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
struct vfio_irq_set *irq_set;
int *fd_ptr;
@@ -301,12 +293,19 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
irq_set = (struct vfio_irq_set *) irq_set_buf;
irq_set->argsz = len;
- irq_set->count = 1;
+ if ((!intr_handle->max_intr) ||
+ (intr_handle->max_intr > VFIO_MAX_QUEUE_ID))
+ max_intr = VFIO_MAX_QUEUE_ID + 1;
+ else
+ max_intr = intr_handle->max_intr;
+
+ irq_set->count = max_intr;
irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
irq_set->start = 0;
fd_ptr = (int *) &irq_set->data;
- *fd_ptr = intr_handle->fd;
+ memcpy(fd_ptr, intr_handle->queue_fd, sizeof(intr_handle->queue_fd));
+ fd_ptr[max_intr - 1] = intr_handle->fd;
ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
@@ -316,22 +315,6 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
return -1;
}
- /* manually trigger interrupt to enable it */
- memset(irq_set, 0, len);
- len = sizeof(struct vfio_irq_set);
- irq_set->argsz = len;
- irq_set->count = 1;
- irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
- irq_set->start = 0;
-
- ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
-
- if (ret) {
- RTE_LOG(ERR, EAL, "Error triggering MSI-X interrupts for fd %d\n",
- intr_handle->fd);
- return -1;
- }
return 0;
}
@@ -339,7 +322,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
static int
vfio_disable_msix(struct rte_intr_handle *intr_handle) {
struct vfio_irq_set *irq_set;
- char irq_set_buf[IRQ_SET_BUF_LEN];
+ char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
int len, ret;
len = sizeof(struct vfio_irq_set);
@@ -824,3 +807,124 @@ rte_eal_intr_init(void)
return -ret;
}
+static void
+eal_intr_process_rx_interrupts(uint8_t port_id, struct epoll_event *events, int nfds)
+{
+ int n, bytes_read;
+ union rte_intr_read_buffer buf;
+ struct rte_intr_handle intr_handle = rte_eth_devices[port_id].pci_dev->intr_handle;
+
+ for (n = 0; n < nfds; n++) {
+ /* set the length to be read dor different handle type */
+ switch (intr_handle.type) {
+ case RTE_INTR_HANDLE_UIO:
+ bytes_read = sizeof(buf.uio_intr_count);
+ break;
+ case RTE_INTR_HANDLE_ALARM:
+ bytes_read = sizeof(buf.timerfd_num);
+ break;
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ bytes_read = sizeof(buf.vfio_intr_count);
+ break;
+#endif
+ default:
+ bytes_read = 1;
+ break;
+ }
+
+ /**
+ * read out to clear the ready-to-be-read flag
+ * for epoll_wait.
+ */
+ bytes_read = read(events[n].data.fd, &buf, bytes_read);
+ if (bytes_read < 0)
+ RTE_LOG(ERR, EAL, "Error reading from file "
+ "descriptor %d: %s\n", events[n].data.fd,
+ strerror(errno));
+ else if (bytes_read == 0)
+ RTE_LOG(ERR, EAL, "Read nothing from file "
+ "descriptor %d\n", events[n].data.fd);
+ }
+}
+
+static void
+eal_intr_handle_rx_interrupts(uint8_t port_id, int pfd, unsigned totalfds)
+{
+ struct epoll_event events[totalfds];
+ int nfds = 0;
+
+m_wait:
+ nfds = epoll_wait(pfd, events, totalfds,
+ EAL_INTR_EPOLL_WAIT_FOREVER);
+ /* epoll_wait fail */
+ if (nfds < 0) {
+ RTE_LOG(ERR, EAL,
+ "epoll_wait returns with fail\n");
+ return;
+ }
+ /* epoll_wait timeout, will never happens here */
+ else if (nfds == 0)
+ goto m_wait;
+ /* epoll_wait has at least one fd ready to read */
+ eal_intr_process_rx_interrupts(port_id, events, nfds);
+}
+
+int
+rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id)
+{
+ struct rte_intr_handle intr_handle = rte_eth_devices[port_id].pci_dev->intr_handle;
+ struct epoll_event ev;
+ unsigned numfds = 0;
+
+ /* create epoll fd */
+ int pfd = epoll_create(1);
+ if (pfd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot create epoll instance\n");
+ return -1;
+ }
+
+ rte_spinlock_lock(&intr_lock);
+
+ ev.events = EPOLLIN | EPOLLPRI;
+ switch (intr_handle.type) {
+ case RTE_INTR_HANDLE_UIO:
+ ev.data.fd = intr_handle.fd;
+ break;
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ ev.data.fd = intr_handle.queue_fd[queue_id];
+ break;
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ ev.data.fd = intr_handle.queue_fd[queue_id];
+ break;
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ ev.data.fd = intr_handle.queue_fd[queue_id];
+ break;
+#endif
+ default:
+ break;
+ close(pfd);
+ return -1;
+ }
+
+ if (epoll_ctl(pfd, EPOLL_CTL_ADD, ev.data.fd, &ev) < 0) {
+ RTE_LOG(ERR, EAL, "Error adding fd %d epoll_ctl, %s\n",
+ intr_handle.queue_fd[queue_id], strerror(errno));
+ } else
+ numfds++;
+
+ rte_spinlock_unlock(&intr_lock);
+ /* serve the interrupt */
+ eal_intr_handle_rx_interrupts(port_id, pfd, numfds);
+
+ /**
+ * when we return, we need to rebuild the
+ * list of fds to monitor.
+ */
+ close(pfd);
+
+ return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index 20e0977..63d0ae8 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -283,7 +283,6 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
dev->intr_handle.fd = fd;
dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
-
switch (i) {
case VFIO_PCI_MSIX_IRQ_INDEX:
internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
@@ -302,6 +301,16 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
return -1;
}
+ for (i = 0; i < VFIO_MAX_QUEUE_ID; i++) {
+ fd = eventfd(0, 0);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, " cannot set up eventfd, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+ dev->intr_handle.queue_fd[i] = fd;
+ }
+
return 0;
}
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
index 23eafd9..c6982cf 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
@@ -38,6 +38,8 @@
#ifndef _RTE_LINUXAPP_INTERRUPTS_H_
#define _RTE_LINUXAPP_INTERRUPTS_H_
+#define VFIO_MAX_QUEUE_ID 32
+
enum rte_intr_handle_type {
RTE_INTR_HANDLE_UNKNOWN = 0,
RTE_INTR_HANDLE_UIO, /**< uio device handle */
@@ -52,6 +54,8 @@ enum rte_intr_handle_type {
struct rte_intr_handle {
int vfio_dev_fd; /**< VFIO device file descriptor */
int fd; /**< file descriptor */
+ int max_intr; /**< max interrupt requested */
+ int queue_fd[VFIO_MAX_QUEUE_ID]; /**< rx and tx queue interrupt file descriptor */
enum rte_intr_handle_type type; /**< handle type */
};
--
1.8.1.4
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [dpdk-dev] [PATCH v1 4/5] eal: add per rx queue interrupt handling based on VFIO
2015-01-28 9:50 ` [dpdk-dev] [PATCH v1 4/5] eal: add per rx queue interrupt handling based on VFIO Danny Zhou
@ 2015-01-28 18:10 ` Liang, Cunming
2015-01-29 4:01 ` Zhou, Danny
2015-01-29 5:06 ` Qiu, Michael
1 sibling, 1 reply; 16+ messages in thread
From: Liang, Cunming @ 2015-01-28 18:10 UTC (permalink / raw)
To: Zhou, Danny, dev
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Danny Zhou
> Sent: Wednesday, January 28, 2015 2:51 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v1 4/5] eal: add per rx queue interrupt handling
> based on VFIO
>
> Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> Signed-off-by: Yong Liu <yong.liu@intel.com>
> ---
> lib/librte_eal/common/include/rte_eal.h | 9 +
> lib/librte_eal/linuxapp/eal/eal_interrupts.c | 186
> ++++++++++++++++-----
> lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 11 +-
> .../linuxapp/eal/include/exec-env/rte_interrupts.h | 4 +
> 4 files changed, 168 insertions(+), 42 deletions(-)
>
> diff --git a/lib/librte_eal/common/include/rte_eal.h
> b/lib/librte_eal/common/include/rte_eal.h
> index f4ecd2e..5f31aa5 100644
> --- a/lib/librte_eal/common/include/rte_eal.h
> +++ b/lib/librte_eal/common/include/rte_eal.h
> @@ -150,6 +150,15 @@ int rte_eal_iopl_init(void);
> * - On failure, a negative error value.
> */
> int rte_eal_init(int argc, char **argv);
> +
> +/**
> + * @param port_id
> + * the port id
> + * @return
> + * - On success, return 0
[LCM] It has changes to return -1.
> + */
> +int rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id);
> +
> /**
> * Usage function typedef used by the application usage function.
> *
> diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> index dc2668a..b120303 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> @@ -64,6 +64,7 @@
> #include <rte_malloc.h>
> #include <rte_errno.h>
> #include <rte_spinlock.h>
> +#include <rte_ethdev.h>
>
> #include "eal_private.h"
> #include "eal_vfio.h"
> @@ -127,6 +128,7 @@ static pthread_t intr_thread;
> #ifdef VFIO_PRESENT
>
> #define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int))
> +#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int) *
> (VFIO_MAX_QUEUE_ID + 1))
>
> /* enable legacy (INTx) interrupts */
> static int
> @@ -221,7 +223,7 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) {
> /* enable MSI-X interrupts */
> static int
> vfio_enable_msi(struct rte_intr_handle *intr_handle) {
> - int len, ret;
> + int len, ret, max_intr;
> char irq_set_buf[IRQ_SET_BUF_LEN];
> struct vfio_irq_set *irq_set;
> int *fd_ptr;
> @@ -230,12 +232,19 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle)
> {
>
> irq_set = (struct vfio_irq_set *) irq_set_buf;
> irq_set->argsz = len;
> - irq_set->count = 1;
> + if ((!intr_handle->max_intr) ||
> + (intr_handle->max_intr > VFIO_MAX_QUEUE_ID))
> + max_intr = VFIO_MAX_QUEUE_ID + 1;
> + else
> + max_intr = intr_handle->max_intr;
> +
> + irq_set->count = max_intr;
> irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
> VFIO_IRQ_SET_ACTION_TRIGGER;
> irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
> irq_set->start = 0;
> fd_ptr = (int *) &irq_set->data;
> - *fd_ptr = intr_handle->fd;
> + memcpy(fd_ptr, intr_handle->queue_fd, sizeof(intr_handle->queue_fd));
> + fd_ptr[max_intr - 1] = intr_handle->fd;
>
> ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
>
> @@ -244,23 +253,6 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
> intr_handle->fd);
> return -1;
> }
> -
> - /* manually trigger interrupt to enable it */
> - memset(irq_set, 0, len);
> - len = sizeof(struct vfio_irq_set);
> - irq_set->argsz = len;
> - irq_set->count = 1;
> - irq_set->flags = VFIO_IRQ_SET_DATA_NONE |
> VFIO_IRQ_SET_ACTION_TRIGGER;
> - irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
> - irq_set->start = 0;
> -
> - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> -
> - if (ret) {
> - RTE_LOG(ERR, EAL, "Error triggering MSI interrupts for fd %d\n",
> - intr_handle->fd);
> - return -1;
> - }
> return 0;
> }
>
> @@ -292,8 +284,8 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) {
> /* enable MSI-X interrupts */
> static int
> vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> - int len, ret;
> - char irq_set_buf[IRQ_SET_BUF_LEN];
> + int len, ret, max_intr;
> + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
> struct vfio_irq_set *irq_set;
> int *fd_ptr;
>
> @@ -301,12 +293,19 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle)
> {
>
> irq_set = (struct vfio_irq_set *) irq_set_buf;
> irq_set->argsz = len;
> - irq_set->count = 1;
> + if ((!intr_handle->max_intr) ||
> + (intr_handle->max_intr > VFIO_MAX_QUEUE_ID))
> + max_intr = VFIO_MAX_QUEUE_ID + 1;
> + else
> + max_intr = intr_handle->max_intr;
> +
> + irq_set->count = max_intr;
> irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
> VFIO_IRQ_SET_ACTION_TRIGGER;
> irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> irq_set->start = 0;
> fd_ptr = (int *) &irq_set->data;
> - *fd_ptr = intr_handle->fd;
> + memcpy(fd_ptr, intr_handle->queue_fd, sizeof(intr_handle->queue_fd));
> + fd_ptr[max_intr - 1] = intr_handle->fd;
>
> ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
>
> @@ -316,22 +315,6 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle)
> {
> return -1;
> }
>
> - /* manually trigger interrupt to enable it */
> - memset(irq_set, 0, len);
> - len = sizeof(struct vfio_irq_set);
> - irq_set->argsz = len;
> - irq_set->count = 1;
> - irq_set->flags = VFIO_IRQ_SET_DATA_NONE |
> VFIO_IRQ_SET_ACTION_TRIGGER;
> - irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> - irq_set->start = 0;
> -
> - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> -
> - if (ret) {
> - RTE_LOG(ERR, EAL, "Error triggering MSI-X interrupts for fd %d\n",
> - intr_handle->fd);
> - return -1;
> - }
> return 0;
> }
>
> @@ -339,7 +322,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> static int
> vfio_disable_msix(struct rte_intr_handle *intr_handle) {
> struct vfio_irq_set *irq_set;
> - char irq_set_buf[IRQ_SET_BUF_LEN];
> + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
> int len, ret;
>
> len = sizeof(struct vfio_irq_set);
> @@ -824,3 +807,124 @@ rte_eal_intr_init(void)
> return -ret;
> }
>
> +static void
> +eal_intr_process_rx_interrupts(uint8_t port_id, struct epoll_event *events, int
> nfds)
> +{
> + int n, bytes_read;
> + union rte_intr_read_buffer buf;
> + struct rte_intr_handle intr_handle =
> rte_eth_devices[port_id].pci_dev->intr_handle;
> +
> + for (n = 0; n < nfds; n++) {
> + /* set the length to be read dor different handle type */
> + switch (intr_handle.type) {
> + case RTE_INTR_HANDLE_UIO:
> + bytes_read = sizeof(buf.uio_intr_count);
> + break;
> + case RTE_INTR_HANDLE_ALARM:
> + bytes_read = sizeof(buf.timerfd_num);
> + break;
> +#ifdef VFIO_PRESENT
> + case RTE_INTR_HANDLE_VFIO_MSIX:
> + case RTE_INTR_HANDLE_VFIO_MSI:
> + case RTE_INTR_HANDLE_VFIO_LEGACY:
> + bytes_read = sizeof(buf.vfio_intr_count);
> + break;
> +#endif
> + default:
> + bytes_read = 1;
> + break;
> + }
> +
> + /**
> + * read out to clear the ready-to-be-read flag
> + * for epoll_wait.
> + */
> + bytes_read = read(events[n].data.fd, &buf, bytes_read);
> + if (bytes_read < 0)
> + RTE_LOG(ERR, EAL, "Error reading from file "
> + "descriptor %d: %s\n", events[n].data.fd,
> + strerror(errno));
> + else if (bytes_read == 0)
> + RTE_LOG(ERR, EAL, "Read nothing from file "
> + "descriptor %d\n", events[n].data.fd);
> + }
> +}
> +
> +static void
> +eal_intr_handle_rx_interrupts(uint8_t port_id, int pfd, unsigned totalfds)
> +{
> + struct epoll_event events[totalfds];
[LCM] I don't think it's a way to define array by var.
> + int nfds = 0;
> +
> +m_wait:
[LCM] do {
> + nfds = epoll_wait(pfd, events, totalfds,
> + EAL_INTR_EPOLL_WAIT_FOREVER);
> + /* epoll_wait fail */
> + if (nfds < 0) {
> + RTE_LOG(ERR, EAL,
> + "epoll_wait returns with fail\n");
> + return;
> + }
> + /* epoll_wait timeout, will never happens here */
[LCM] } while (nfds == 0);
> + else if (nfds == 0)
> + goto m_wait;
[LCM] Then we don't need lable 'm_wait' there.
> + /* epoll_wait has at least one fd ready to read */
> + eal_intr_process_rx_interrupts(port_id, events, nfds);
> +}
> +
> +int
> +rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id)
> +{
> + struct rte_intr_handle intr_handle =
> rte_eth_devices[port_id].pci_dev->intr_handle;
> + struct epoll_event ev;
> + unsigned numfds = 0;
> +
> + /* create epoll fd */
> + int pfd = epoll_create(1);
> + if (pfd < 0) {
> + RTE_LOG(ERR, EAL, "Cannot create epoll instance\n");
> + return -1;
> + }
> +
> + rte_spinlock_lock(&intr_lock);
> +
> + ev.events = EPOLLIN | EPOLLPRI;
> + switch (intr_handle.type) {
> + case RTE_INTR_HANDLE_UIO:
> + ev.data.fd = intr_handle.fd;
> + break;
> +#ifdef VFIO_PRESENT
> + case RTE_INTR_HANDLE_VFIO_MSIX:
> + ev.data.fd = intr_handle.queue_fd[queue_id];
> + break;
> + case RTE_INTR_HANDLE_VFIO_MSI:
> + ev.data.fd = intr_handle.queue_fd[queue_id];
> + break;
> + case RTE_INTR_HANDLE_VFIO_LEGACY:
> + ev.data.fd = intr_handle.queue_fd[queue_id];
> + break;
> +#endif
> + default:
> + break;
> + close(pfd);
> + return -1;
[LCM] It makes confusing here. If directly return, 'break' won't require and need to add unlock.
> + }
> +
> + if (epoll_ctl(pfd, EPOLL_CTL_ADD, ev.data.fd, &ev) < 0) {
> + RTE_LOG(ERR, EAL, "Error adding fd %d epoll_ctl, %s\n",
> + intr_handle.queue_fd[queue_id], strerror(errno));
> + } else
> + numfds++;
> +
> + rte_spinlock_unlock(&intr_lock);
> + /* serve the interrupt */
> + eal_intr_handle_rx_interrupts(port_id, pfd, numfds);
> +
> + /**
> + * when we return, we need to rebuild the
> + * list of fds to monitor.
> + */
> + close(pfd);
> +
> + return 0;
> +}
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> index 20e0977..63d0ae8 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> @@ -283,7 +283,6 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev,
> int vfio_dev_fd)
>
> dev->intr_handle.fd = fd;
> dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
> -
> switch (i) {
> case VFIO_PCI_MSIX_IRQ_INDEX:
> internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
> @@ -302,6 +301,16 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev,
> int vfio_dev_fd)
> return -1;
> }
>
> + for (i = 0; i < VFIO_MAX_QUEUE_ID; i++) {
> + fd = eventfd(0, 0);
> + if (fd < 0) {
> + RTE_LOG(ERR, EAL, " cannot set up eventfd, "
> + "error %i (%s)\n", errno, strerror(errno));
> + return -1;
> + }
> + dev->intr_handle.queue_fd[i] = fd;
> + }
> +
> return 0;
> }
>
> diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> index 23eafd9..c6982cf 100644
> --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> @@ -38,6 +38,8 @@
> #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
> #define _RTE_LINUXAPP_INTERRUPTS_H_
>
> +#define VFIO_MAX_QUEUE_ID 32
> +
> enum rte_intr_handle_type {
> RTE_INTR_HANDLE_UNKNOWN = 0,
> RTE_INTR_HANDLE_UIO, /**< uio device handle */
> @@ -52,6 +54,8 @@ enum rte_intr_handle_type {
> struct rte_intr_handle {
> int vfio_dev_fd; /**< VFIO device file descriptor */
> int fd; /**< file descriptor */
> + int max_intr; /**< max interrupt requested */
> + int queue_fd[VFIO_MAX_QUEUE_ID]; /**< rx and tx queue interrupt file
> descriptor */
> enum rte_intr_handle_type type; /**< handle type */
> };
>
> --
> 1.8.1.4
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [dpdk-dev] [PATCH v1 4/5] eal: add per rx queue interrupt handling based on VFIO
2015-01-28 18:10 ` Liang, Cunming
@ 2015-01-29 4:01 ` Zhou, Danny
0 siblings, 0 replies; 16+ messages in thread
From: Zhou, Danny @ 2015-01-29 4:01 UTC (permalink / raw)
To: Liang, Cunming, dev
Thanks for review Steve. Comments inline.
> -----Original Message-----
> From: Liang, Cunming
> Sent: Thursday, January 29, 2015 2:10 AM
> To: Zhou, Danny; dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v1 4/5] eal: add per rx queue interrupt handling based on VFIO
>
>
>
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Danny Zhou
> > Sent: Wednesday, January 28, 2015 2:51 AM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH v1 4/5] eal: add per rx queue interrupt handling
> > based on VFIO
> >
> > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > Signed-off-by: Yong Liu <yong.liu@intel.com>
> > ---
> > lib/librte_eal/common/include/rte_eal.h | 9 +
> > lib/librte_eal/linuxapp/eal/eal_interrupts.c | 186
> > ++++++++++++++++-----
> > lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 11 +-
> > .../linuxapp/eal/include/exec-env/rte_interrupts.h | 4 +
> > 4 files changed, 168 insertions(+), 42 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/include/rte_eal.h
> > b/lib/librte_eal/common/include/rte_eal.h
> > index f4ecd2e..5f31aa5 100644
> > --- a/lib/librte_eal/common/include/rte_eal.h
> > +++ b/lib/librte_eal/common/include/rte_eal.h
> > @@ -150,6 +150,15 @@ int rte_eal_iopl_init(void);
> > * - On failure, a negative error value.
> > */
> > int rte_eal_init(int argc, char **argv);
> > +
> > +/**
> > + * @param port_id
> > + * the port id
> > + * @return
> > + * - On success, return 0
> [LCM] It has changes to return -1.
> > + */
> > +int rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id);
> > +
> > /**
> > * Usage function typedef used by the application usage function.
> > *
> > diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> > b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> > index dc2668a..b120303 100644
> > --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> > +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
> > @@ -64,6 +64,7 @@
> > #include <rte_malloc.h>
> > #include <rte_errno.h>
> > #include <rte_spinlock.h>
> > +#include <rte_ethdev.h>
> >
> > #include "eal_private.h"
> > #include "eal_vfio.h"
> > @@ -127,6 +128,7 @@ static pthread_t intr_thread;
> > #ifdef VFIO_PRESENT
> >
> > #define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int))
> > +#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int) *
> > (VFIO_MAX_QUEUE_ID + 1))
> >
> > /* enable legacy (INTx) interrupts */
> > static int
> > @@ -221,7 +223,7 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) {
> > /* enable MSI-X interrupts */
> > static int
> > vfio_enable_msi(struct rte_intr_handle *intr_handle) {
> > - int len, ret;
> > + int len, ret, max_intr;
> > char irq_set_buf[IRQ_SET_BUF_LEN];
> > struct vfio_irq_set *irq_set;
> > int *fd_ptr;
> > @@ -230,12 +232,19 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle)
> > {
> >
> > irq_set = (struct vfio_irq_set *) irq_set_buf;
> > irq_set->argsz = len;
> > - irq_set->count = 1;
> > + if ((!intr_handle->max_intr) ||
> > + (intr_handle->max_intr > VFIO_MAX_QUEUE_ID))
> > + max_intr = VFIO_MAX_QUEUE_ID + 1;
> > + else
> > + max_intr = intr_handle->max_intr;
> > +
> > + irq_set->count = max_intr;
> > irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
> > VFIO_IRQ_SET_ACTION_TRIGGER;
> > irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
> > irq_set->start = 0;
> > fd_ptr = (int *) &irq_set->data;
> > - *fd_ptr = intr_handle->fd;
> > + memcpy(fd_ptr, intr_handle->queue_fd, sizeof(intr_handle->queue_fd));
> > + fd_ptr[max_intr - 1] = intr_handle->fd;
> >
> > ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> >
> > @@ -244,23 +253,6 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
> > intr_handle->fd);
> > return -1;
> > }
> > -
> > - /* manually trigger interrupt to enable it */
> > - memset(irq_set, 0, len);
> > - len = sizeof(struct vfio_irq_set);
> > - irq_set->argsz = len;
> > - irq_set->count = 1;
> > - irq_set->flags = VFIO_IRQ_SET_DATA_NONE |
> > VFIO_IRQ_SET_ACTION_TRIGGER;
> > - irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
> > - irq_set->start = 0;
> > -
> > - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> > -
> > - if (ret) {
> > - RTE_LOG(ERR, EAL, "Error triggering MSI interrupts for fd %d\n",
> > - intr_handle->fd);
> > - return -1;
> > - }
> > return 0;
> > }
> >
> > @@ -292,8 +284,8 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) {
> > /* enable MSI-X interrupts */
> > static int
> > vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> > - int len, ret;
> > - char irq_set_buf[IRQ_SET_BUF_LEN];
> > + int len, ret, max_intr;
> > + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
> > struct vfio_irq_set *irq_set;
> > int *fd_ptr;
> >
> > @@ -301,12 +293,19 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle)
> > {
> >
> > irq_set = (struct vfio_irq_set *) irq_set_buf;
> > irq_set->argsz = len;
> > - irq_set->count = 1;
> > + if ((!intr_handle->max_intr) ||
> > + (intr_handle->max_intr > VFIO_MAX_QUEUE_ID))
> > + max_intr = VFIO_MAX_QUEUE_ID + 1;
> > + else
> > + max_intr = intr_handle->max_intr;
> > +
> > + irq_set->count = max_intr;
> > irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
> > VFIO_IRQ_SET_ACTION_TRIGGER;
> > irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> > irq_set->start = 0;
> > fd_ptr = (int *) &irq_set->data;
> > - *fd_ptr = intr_handle->fd;
> > + memcpy(fd_ptr, intr_handle->queue_fd, sizeof(intr_handle->queue_fd));
> > + fd_ptr[max_intr - 1] = intr_handle->fd;
> >
> > ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> >
> > @@ -316,22 +315,6 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle)
> > {
> > return -1;
> > }
> >
> > - /* manually trigger interrupt to enable it */
> > - memset(irq_set, 0, len);
> > - len = sizeof(struct vfio_irq_set);
> > - irq_set->argsz = len;
> > - irq_set->count = 1;
> > - irq_set->flags = VFIO_IRQ_SET_DATA_NONE |
> > VFIO_IRQ_SET_ACTION_TRIGGER;
> > - irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> > - irq_set->start = 0;
> > -
> > - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> > -
> > - if (ret) {
> > - RTE_LOG(ERR, EAL, "Error triggering MSI-X interrupts for fd %d\n",
> > - intr_handle->fd);
> > - return -1;
> > - }
> > return 0;
> > }
> >
> > @@ -339,7 +322,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
> > static int
> > vfio_disable_msix(struct rte_intr_handle *intr_handle) {
> > struct vfio_irq_set *irq_set;
> > - char irq_set_buf[IRQ_SET_BUF_LEN];
> > + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
> > int len, ret;
> >
> > len = sizeof(struct vfio_irq_set);
> > @@ -824,3 +807,124 @@ rte_eal_intr_init(void)
> > return -ret;
> > }
> >
> > +static void
> > +eal_intr_process_rx_interrupts(uint8_t port_id, struct epoll_event *events, int
> > nfds)
> > +{
> > + int n, bytes_read;
> > + union rte_intr_read_buffer buf;
> > + struct rte_intr_handle intr_handle =
> > rte_eth_devices[port_id].pci_dev->intr_handle;
> > +
> > + for (n = 0; n < nfds; n++) {
> > + /* set the length to be read dor different handle type */
> > + switch (intr_handle.type) {
> > + case RTE_INTR_HANDLE_UIO:
> > + bytes_read = sizeof(buf.uio_intr_count);
> > + break;
> > + case RTE_INTR_HANDLE_ALARM:
> > + bytes_read = sizeof(buf.timerfd_num);
> > + break;
> > +#ifdef VFIO_PRESENT
> > + case RTE_INTR_HANDLE_VFIO_MSIX:
> > + case RTE_INTR_HANDLE_VFIO_MSI:
> > + case RTE_INTR_HANDLE_VFIO_LEGACY:
> > + bytes_read = sizeof(buf.vfio_intr_count);
> > + break;
> > +#endif
> > + default:
> > + bytes_read = 1;
> > + break;
> > + }
> > +
> > + /**
> > + * read out to clear the ready-to-be-read flag
> > + * for epoll_wait.
> > + */
> > + bytes_read = read(events[n].data.fd, &buf, bytes_read);
> > + if (bytes_read < 0)
> > + RTE_LOG(ERR, EAL, "Error reading from file "
> > + "descriptor %d: %s\n", events[n].data.fd,
> > + strerror(errno));
> > + else if (bytes_read == 0)
> > + RTE_LOG(ERR, EAL, "Read nothing from file "
> > + "descriptor %d\n", events[n].data.fd);
> > + }
> > +}
> > +
> > +static void
> > +eal_intr_handle_rx_interrupts(uint8_t port_id, int pfd, unsigned totalfds)
> > +{
> > + struct epoll_event events[totalfds];
> [LCM] I don't think it's a way to define array by var.
It looks a little bit ugly. The similar implementation in eal_intr_handle_interrupts()
has been there for years. This part of code copied from eal_intr_handle_interrupts().
>
> > + int nfds = 0;
> > +
> > +m_wait:
>
> [LCM] do {
> > + nfds = epoll_wait(pfd, events, totalfds,
> > + EAL_INTR_EPOLL_WAIT_FOREVER);
> > + /* epoll_wait fail */
> > + if (nfds < 0) {
> > + RTE_LOG(ERR, EAL,
> > + "epoll_wait returns with fail\n");
> > + return;
> > + }
> > + /* epoll_wait timeout, will never happens here */
> [LCM] } while (nfds == 0);
>
> > + else if (nfds == 0)
> > + goto m_wait;
> [LCM] Then we don't need lable 'm_wait' there.
Good suggestion and will take it. There is another "goto m_wait" which
is removed per other' suggestion. For current control flow, do..while is
cleaner and better.
> > + /* epoll_wait has at least one fd ready to read */
> > + eal_intr_process_rx_interrupts(port_id, events, nfds);
> > +}
> > +
> > +int
> > +rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id)
> > +{
> > + struct rte_intr_handle intr_handle =
> > rte_eth_devices[port_id].pci_dev->intr_handle;
> > + struct epoll_event ev;
> > + unsigned numfds = 0;
> > +
> > + /* create epoll fd */
> > + int pfd = epoll_create(1);
> > + if (pfd < 0) {
> > + RTE_LOG(ERR, EAL, "Cannot create epoll instance\n");
> > + return -1;
> > + }
> > +
> > + rte_spinlock_lock(&intr_lock);
> > +
> > + ev.events = EPOLLIN | EPOLLPRI;
> > + switch (intr_handle.type) {
> > + case RTE_INTR_HANDLE_UIO:
> > + ev.data.fd = intr_handle.fd;
> > + break;
> > +#ifdef VFIO_PRESENT
> > + case RTE_INTR_HANDLE_VFIO_MSIX:
> > + ev.data.fd = intr_handle.queue_fd[queue_id];
> > + break;
> > + case RTE_INTR_HANDLE_VFIO_MSI:
> > + ev.data.fd = intr_handle.queue_fd[queue_id];
> > + break;
> > + case RTE_INTR_HANDLE_VFIO_LEGACY:
> > + ev.data.fd = intr_handle.queue_fd[queue_id];
> > + break;
> > +#endif
> > + default:
> > + break;
> > + close(pfd);
> > + return -1;
> [LCM] It makes confusing here. If directly return, 'break' won't require and need to add unlock.
Good catch, it will be fixed in V2.
> > + }
> > +
> > + if (epoll_ctl(pfd, EPOLL_CTL_ADD, ev.data.fd, &ev) < 0) {
> > + RTE_LOG(ERR, EAL, "Error adding fd %d epoll_ctl, %s\n",
> > + intr_handle.queue_fd[queue_id], strerror(errno));
> > + } else
> > + numfds++;
> > +
> > + rte_spinlock_unlock(&intr_lock);
> > + /* serve the interrupt */
> > + eal_intr_handle_rx_interrupts(port_id, pfd, numfds);
> > +
> > + /**
> > + * when we return, we need to rebuild the
> > + * list of fds to monitor.
> > + */
> > + close(pfd);
> > +
> > + return 0;
> > +}
> > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > index 20e0977..63d0ae8 100644
> > --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > @@ -283,7 +283,6 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev,
> > int vfio_dev_fd)
> >
> > dev->intr_handle.fd = fd;
> > dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
> > -
> > switch (i) {
> > case VFIO_PCI_MSIX_IRQ_INDEX:
> > internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
> > @@ -302,6 +301,16 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev,
> > int vfio_dev_fd)
> > return -1;
> > }
> >
> > + for (i = 0; i < VFIO_MAX_QUEUE_ID; i++) {
> > + fd = eventfd(0, 0);
> > + if (fd < 0) {
> > + RTE_LOG(ERR, EAL, " cannot set up eventfd, "
> > + "error %i (%s)\n", errno, strerror(errno));
> > + return -1;
> > + }
> > + dev->intr_handle.queue_fd[i] = fd;
> > + }
> > +
> > return 0;
> > }
> >
> > diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > index 23eafd9..c6982cf 100644
> > --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > @@ -38,6 +38,8 @@
> > #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
> > #define _RTE_LINUXAPP_INTERRUPTS_H_
> >
> > +#define VFIO_MAX_QUEUE_ID 32
> > +
> > enum rte_intr_handle_type {
> > RTE_INTR_HANDLE_UNKNOWN = 0,
> > RTE_INTR_HANDLE_UIO, /**< uio device handle */
> > @@ -52,6 +54,8 @@ enum rte_intr_handle_type {
> > struct rte_intr_handle {
> > int vfio_dev_fd; /**< VFIO device file descriptor */
> > int fd; /**< file descriptor */
> > + int max_intr; /**< max interrupt requested */
> > + int queue_fd[VFIO_MAX_QUEUE_ID]; /**< rx and tx queue interrupt file
> > descriptor */
> > enum rte_intr_handle_type type; /**< handle type */
> > };
> >
> > --
> > 1.8.1.4
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [dpdk-dev] [PATCH v1 4/5] eal: add per rx queue interrupt handling based on VFIO
2015-01-28 9:50 ` [dpdk-dev] [PATCH v1 4/5] eal: add per rx queue interrupt handling based on VFIO Danny Zhou
2015-01-28 18:10 ` Liang, Cunming
@ 2015-01-29 5:06 ` Qiu, Michael
2015-01-29 5:24 ` Zhou, Danny
1 sibling, 1 reply; 16+ messages in thread
From: Qiu, Michael @ 2015-01-29 5:06 UTC (permalink / raw)
To: Zhou, Danny, dev
On 1/28/2015 5:52 PM, Danny Zhou wrote:
> Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> Signed-off-by: Yong Liu <yong.liu@intel.com>
> ---
[...]
> +static void
> +eal_intr_process_rx_interrupts(uint8_t port_id, struct epoll_event *events, int nfds)
> +{
> + int n, bytes_read;
> + union rte_intr_read_buffer buf;
> + struct rte_intr_handle intr_handle = rte_eth_devices[port_id].pci_dev->intr_handle;
> +
> + for (n = 0; n < nfds; n++) {
> + /* set the length to be read dor different handle type */
> + switch (intr_handle.type) {
> + case RTE_INTR_HANDLE_UIO:
> + bytes_read = sizeof(buf.uio_intr_count);
> + break;
> + case RTE_INTR_HANDLE_ALARM:
> + bytes_read = sizeof(buf.timerfd_num);
> + break;
> +#ifdef VFIO_PRESENT
> + case RTE_INTR_HANDLE_VFIO_MSIX:
> + case RTE_INTR_HANDLE_VFIO_MSI:
> + case RTE_INTR_HANDLE_VFIO_LEGACY:
> + bytes_read = sizeof(buf.vfio_intr_count);
> + break;
> +#endif
> + default:
> + bytes_read = 1;
> + break;
> + }
> +
> + /**
> + * read out to clear the ready-to-be-read flag
> + * for epoll_wait.
> + */
> + bytes_read = read(events[n].data.fd, &buf, bytes_read);
> + if (bytes_read < 0)
> + RTE_LOG(ERR, EAL, "Error reading from file "
> + "descriptor %d: %s\n", events[n].data.fd,
> + strerror(errno));
> + else if (bytes_read == 0)
> + RTE_LOG(ERR, EAL, "Read nothing from file "
> + "descriptor %d\n", events[n].data.fd);
Is there any issue while bytes_read is not equal to the count need to be
read?
> + }
> +}
> +
> +static void
> +eal_intr_handle_rx_interrupts(uint8_t port_id, int pfd, unsigned totalfds)
> +{
> + struct epoll_event events[totalfds];
> + int nfds = 0;
> +
> +m_wait:
> + nfds = epoll_wait(pfd, events, totalfds,
> + EAL_INTR_EPOLL_WAIT_FOREVER);
> + /* epoll_wait fail */
> + if (nfds < 0) {
> + RTE_LOG(ERR, EAL,
> + "epoll_wait returns with fail\n");
> + return;
> + }
> + /* epoll_wait timeout, will never happens here */
> + else if (nfds == 0)
> + goto m_wait;
> + /* epoll_wait has at least one fd ready to read */
> + eal_intr_process_rx_interrupts(port_id, events, nfds);
> +}
> +
> +int
> +rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id)
> +{
> + struct rte_intr_handle intr_handle = rte_eth_devices[port_id].pci_dev->intr_handle;
> + struct epoll_event ev;
> + unsigned numfds = 0;
> +
> + /* create epoll fd */
> + int pfd = epoll_create(1);
> + if (pfd < 0) {
> + RTE_LOG(ERR, EAL, "Cannot create epoll instance\n");
> + return -1;
> + }
> +
> + rte_spinlock_lock(&intr_lock);
> +
> + ev.events = EPOLLIN | EPOLLPRI;
> + switch (intr_handle.type) {
> + case RTE_INTR_HANDLE_UIO:
> + ev.data.fd = intr_handle.fd;
> + break;
> +#ifdef VFIO_PRESENT
> + case RTE_INTR_HANDLE_VFIO_MSIX:
> + ev.data.fd = intr_handle.queue_fd[queue_id];
> + break;
> + case RTE_INTR_HANDLE_VFIO_MSI:
> + ev.data.fd = intr_handle.queue_fd[queue_id];
> + break;
> + case RTE_INTR_HANDLE_VFIO_LEGACY:
> + ev.data.fd = intr_handle.queue_fd[queue_id];
> + break;
As those three branches are all the same, why not combine to one?
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ ev.data.fd = intr_handle.queue_fd[queue_id];
+ break;
> +#endif
> + default:
> + break;
> + close(pfd);
> + return -1;
Here Steve has pointed out, but indentation should be a issue.
> + }
> +
> + if (epoll_ctl(pfd, EPOLL_CTL_ADD, ev.data.fd, &ev) < 0) {
> + RTE_LOG(ERR, EAL, "Error adding fd %d epoll_ctl, %s\n",
> + intr_handle.queue_fd[queue_id], strerror(errno));
> + } else
> + numfds++;
> +
> + rte_spinlock_unlock(&intr_lock);
> + /* serve the interrupt */
> + eal_intr_handle_rx_interrupts(port_id, pfd, numfds);
> +
> + /**
> + * when we return, we need to rebuild the
> + * list of fds to monitor.
> + */
> + close(pfd);
> +
> + return 0;
> +}
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> index 20e0977..63d0ae8 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> @@ -283,7 +283,6 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
>
> dev->intr_handle.fd = fd;
> dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
> -
> switch (i) {
> case VFIO_PCI_MSIX_IRQ_INDEX:
> internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
> @@ -302,6 +301,16 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
> return -1;
> }
>
> + for (i = 0; i < VFIO_MAX_QUEUE_ID; i++) {
> + fd = eventfd(0, 0);
> + if (fd < 0) {
> + RTE_LOG(ERR, EAL, " cannot set up eventfd, "
> + "error %i (%s)\n", errno, strerror(errno));
> + return -1;
> + }
> + dev->intr_handle.queue_fd[i] = fd;
> + }
> +
> return 0;
> }
>
> diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> index 23eafd9..c6982cf 100644
> --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> @@ -38,6 +38,8 @@
> #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
> #define _RTE_LINUXAPP_INTERRUPTS_H_
>
> +#define VFIO_MAX_QUEUE_ID 32
> +
> enum rte_intr_handle_type {
> RTE_INTR_HANDLE_UNKNOWN = 0,
> RTE_INTR_HANDLE_UIO, /**< uio device handle */
> @@ -52,6 +54,8 @@ enum rte_intr_handle_type {
> struct rte_intr_handle {
> int vfio_dev_fd; /**< VFIO device file descriptor */
> int fd; /**< file descriptor */
> + int max_intr; /**< max interrupt requested */
> + int queue_fd[VFIO_MAX_QUEUE_ID]; /**< rx and tx queue interrupt file descriptor */
> enum rte_intr_handle_type type; /**< handle type */
> };
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [dpdk-dev] [PATCH v1 4/5] eal: add per rx queue interrupt handling based on VFIO
2015-01-29 5:06 ` Qiu, Michael
@ 2015-01-29 5:24 ` Zhou, Danny
0 siblings, 0 replies; 16+ messages in thread
From: Zhou, Danny @ 2015-01-29 5:24 UTC (permalink / raw)
To: Qiu, Michael, dev
> -----Original Message-----
> From: Qiu, Michael
> Sent: Thursday, January 29, 2015 1:07 PM
> To: Zhou, Danny; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v1 4/5] eal: add per rx queue interrupt handling based on VFIO
>
> On 1/28/2015 5:52 PM, Danny Zhou wrote:
> > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > Signed-off-by: Yong Liu <yong.liu@intel.com>
> > ---
>
> [...]
>
> > +static void
> > +eal_intr_process_rx_interrupts(uint8_t port_id, struct epoll_event *events, int nfds)
> > +{
> > + int n, bytes_read;
> > + union rte_intr_read_buffer buf;
> > + struct rte_intr_handle intr_handle = rte_eth_devices[port_id].pci_dev->intr_handle;
> > +
> > + for (n = 0; n < nfds; n++) {
> > + /* set the length to be read dor different handle type */
> > + switch (intr_handle.type) {
> > + case RTE_INTR_HANDLE_UIO:
> > + bytes_read = sizeof(buf.uio_intr_count);
> > + break;
> > + case RTE_INTR_HANDLE_ALARM:
> > + bytes_read = sizeof(buf.timerfd_num);
> > + break;
> > +#ifdef VFIO_PRESENT
> > + case RTE_INTR_HANDLE_VFIO_MSIX:
> > + case RTE_INTR_HANDLE_VFIO_MSI:
> > + case RTE_INTR_HANDLE_VFIO_LEGACY:
> > + bytes_read = sizeof(buf.vfio_intr_count);
> > + break;
> > +#endif
> > + default:
> > + bytes_read = 1;
> > + break;
> > + }
> > +
> > + /**
> > + * read out to clear the ready-to-be-read flag
> > + * for epoll_wait.
> > + */
> > + bytes_read = read(events[n].data.fd, &buf, bytes_read);
> > + if (bytes_read < 0)
> > + RTE_LOG(ERR, EAL, "Error reading from file "
> > + "descriptor %d: %s\n", events[n].data.fd,
> > + strerror(errno));
> > + else if (bytes_read == 0)
> > + RTE_LOG(ERR, EAL, "Read nothing from file "
> > + "descriptor %d\n", events[n].data.fd);
>
> Is there any issue while bytes_read is not equal to the count need to be
> read?
>
Not a problem as long as bytes_read > 0.
> > + }
> > +}
> > +
> > +static void
> > +eal_intr_handle_rx_interrupts(uint8_t port_id, int pfd, unsigned totalfds)
> > +{
> > + struct epoll_event events[totalfds];
> > + int nfds = 0;
> > +
> > +m_wait:
> > + nfds = epoll_wait(pfd, events, totalfds,
> > + EAL_INTR_EPOLL_WAIT_FOREVER);
> > + /* epoll_wait fail */
> > + if (nfds < 0) {
> > + RTE_LOG(ERR, EAL,
> > + "epoll_wait returns with fail\n");
> > + return;
> > + }
> > + /* epoll_wait timeout, will never happens here */
> > + else if (nfds == 0)
> > + goto m_wait;
> > + /* epoll_wait has at least one fd ready to read */
> > + eal_intr_process_rx_interrupts(port_id, events, nfds);
> > +}
> > +
> > +int
> > +rte_eal_wait_rx_intr(uint8_t port_id, uint8_t queue_id)
> > +{
> > + struct rte_intr_handle intr_handle = rte_eth_devices[port_id].pci_dev->intr_handle;
> > + struct epoll_event ev;
> > + unsigned numfds = 0;
> > +
> > + /* create epoll fd */
> > + int pfd = epoll_create(1);
> > + if (pfd < 0) {
> > + RTE_LOG(ERR, EAL, "Cannot create epoll instance\n");
> > + return -1;
> > + }
> > +
> > + rte_spinlock_lock(&intr_lock);
> > +
> > + ev.events = EPOLLIN | EPOLLPRI;
> > + switch (intr_handle.type) {
> > + case RTE_INTR_HANDLE_UIO:
> > + ev.data.fd = intr_handle.fd;
> > + break;
> > +#ifdef VFIO_PRESENT
> > + case RTE_INTR_HANDLE_VFIO_MSIX:
> > + ev.data.fd = intr_handle.queue_fd[queue_id];
> > + break;
> > + case RTE_INTR_HANDLE_VFIO_MSI:
> > + ev.data.fd = intr_handle.queue_fd[queue_id];
> > + break;
> > + case RTE_INTR_HANDLE_VFIO_LEGACY:
> > + ev.data.fd = intr_handle.queue_fd[queue_id];
> > + break;
>
> As those three branches are all the same, why not combine to one?
>
> + case RTE_INTR_HANDLE_VFIO_MSIX:
> + case RTE_INTR_HANDLE_VFIO_MSI:
> + case RTE_INTR_HANDLE_VFIO_LEGACY:
> + ev.data.fd = intr_handle.queue_fd[queue_id];
> + break;
>
Accepted.
>
> > +#endif
> > + default:
> > + break;
> > + close(pfd);
> > + return -1;
>
> Here Steve has pointed out, but indentation should be a issue.
>
To be fixed in V2.
> > + }
> > +
> > + if (epoll_ctl(pfd, EPOLL_CTL_ADD, ev.data.fd, &ev) < 0) {
> > + RTE_LOG(ERR, EAL, "Error adding fd %d epoll_ctl, %s\n",
> > + intr_handle.queue_fd[queue_id], strerror(errno));
> > + } else
> > + numfds++;
> > +
> > + rte_spinlock_unlock(&intr_lock);
> > + /* serve the interrupt */
> > + eal_intr_handle_rx_interrupts(port_id, pfd, numfds);
> > +
> > + /**
> > + * when we return, we need to rebuild the
> > + * list of fds to monitor.
> > + */
> > + close(pfd);
> > +
> > + return 0;
> > +}
> > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > index 20e0977..63d0ae8 100644
> > --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > @@ -283,7 +283,6 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
> >
> > dev->intr_handle.fd = fd;
> > dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
> > -
> > switch (i) {
> > case VFIO_PCI_MSIX_IRQ_INDEX:
> > internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
> > @@ -302,6 +301,16 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
> > return -1;
> > }
> >
> > + for (i = 0; i < VFIO_MAX_QUEUE_ID; i++) {
> > + fd = eventfd(0, 0);
> > + if (fd < 0) {
> > + RTE_LOG(ERR, EAL, " cannot set up eventfd, "
> > + "error %i (%s)\n", errno, strerror(errno));
> > + return -1;
> > + }
> > + dev->intr_handle.queue_fd[i] = fd;
> > + }
> > +
> > return 0;
> > }
> >
> > diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > index 23eafd9..c6982cf 100644
> > --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
> > @@ -38,6 +38,8 @@
> > #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
> > #define _RTE_LINUXAPP_INTERRUPTS_H_
> >
> > +#define VFIO_MAX_QUEUE_ID 32
> > +
> > enum rte_intr_handle_type {
> > RTE_INTR_HANDLE_UNKNOWN = 0,
> > RTE_INTR_HANDLE_UIO, /**< uio device handle */
> > @@ -52,6 +54,8 @@ enum rte_intr_handle_type {
> > struct rte_intr_handle {
> > int vfio_dev_fd; /**< VFIO device file descriptor */
> > int fd; /**< file descriptor */
> > + int max_intr; /**< max interrupt requested */
> > + int queue_fd[VFIO_MAX_QUEUE_ID]; /**< rx and tx queue interrupt file descriptor */
> > enum rte_intr_handle_type type; /**< handle type */
> > };
> >
^ permalink raw reply [flat|nested] 16+ messages in thread
* [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch
2015-01-28 9:50 [dpdk-dev] [PATCH v1 0/5] Interrupt mode for PMD Danny Zhou
` (3 preceding siblings ...)
2015-01-28 9:50 ` [dpdk-dev] [PATCH v1 4/5] eal: add per rx queue interrupt handling based on VFIO Danny Zhou
@ 2015-01-28 9:50 ` Danny Zhou
2015-01-28 18:34 ` Liang, Cunming
4 siblings, 1 reply; 16+ messages in thread
From: Danny Zhou @ 2015-01-28 9:50 UTC (permalink / raw)
To: dev
Signed-off-by: Danny Zhou <danny.zhou@intel.com>
---
examples/l3fwd-power/main.c | 170 +++++++++++++++++++++++++++++++++-----------
1 file changed, 129 insertions(+), 41 deletions(-)
diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
index f6b55b9..e6e4f55 100644
--- a/examples/l3fwd-power/main.c
+++ b/examples/l3fwd-power/main.c
@@ -75,12 +75,13 @@
#include <rte_string_fns.h>
#include <rte_timer.h>
#include <rte_power.h>
+#include <rte_eal.h>
#define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1
#define MAX_PKT_BURST 32
-#define MIN_ZERO_POLL_COUNT 5
+#define MIN_ZERO_POLL_COUNT 10
/* around 100ms at 2 Ghz */
#define TIMER_RESOLUTION_CYCLES 200000000ULL
@@ -188,6 +189,9 @@ struct lcore_rx_queue {
#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
#define MAX_RX_QUEUE_PER_PORT 128
+#define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16
+
+
#define MAX_LCORE_PARAMS 1024
struct lcore_params {
uint8_t port_id;
@@ -214,7 +218,7 @@ static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
static struct rte_eth_conf port_conf = {
.rxmode = {
- .mq_mode = ETH_MQ_RX_RSS,
+ .mq_mode = ETH_MQ_RX_RSS,
.max_rx_pkt_len = ETHER_MAX_LEN,
.split_hdr_size = 0,
.header_split = 0, /**< Header Split disabled */
@@ -226,11 +230,14 @@ static struct rte_eth_conf port_conf = {
.rx_adv_conf = {
.rss_conf = {
.rss_key = NULL,
- .rss_hf = ETH_RSS_IP,
+ .rss_hf = ETH_RSS_UDP,
},
},
.txmode = {
- .mq_mode = ETH_DCB_NONE,
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+ .intr_conf = {
+ .rxq = 1, /**< rxq interrupt feature enabled */
},
};
@@ -402,19 +409,22 @@ power_timer_cb(__attribute__((unused)) struct rte_timer *tim,
/* accumulate total execution time in us when callback is invoked */
sleep_time_ratio = (float)(stats[lcore_id].sleep_time) /
(float)SCALING_PERIOD;
-
/**
* check whether need to scale down frequency a step if it sleep a lot.
*/
- if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD)
- rte_power_freq_down(lcore_id);
+ if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) {
+ if (rte_power_freq_down)
+ rte_power_freq_down(lcore_id);
+ }
else if ( (unsigned)(stats[lcore_id].nb_rx_processed /
- stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST)
+ stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) {
/**
* scale down a step if average packet per iteration less
* than expectation.
*/
- rte_power_freq_down(lcore_id);
+ if (rte_power_freq_down)
+ rte_power_freq_down(lcore_id);
+ }
/**
* initialize another timer according to current frequency to ensure
@@ -707,22 +717,20 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid,
}
-#define SLEEP_GEAR1_THRESHOLD 100
-#define SLEEP_GEAR2_THRESHOLD 1000
+#define MINIMUM_SLEEP_TIME 1
+#define SUSPEND_THRESHOLD 300
static inline uint32_t
power_idle_heuristic(uint32_t zero_rx_packet_count)
{
- /* If zero count is less than 100, use it as the sleep time in us */
- if (zero_rx_packet_count < SLEEP_GEAR1_THRESHOLD)
- return zero_rx_packet_count;
- /* If zero count is less than 1000, sleep time should be 100 us */
- else if ((zero_rx_packet_count >= SLEEP_GEAR1_THRESHOLD) &&
- (zero_rx_packet_count < SLEEP_GEAR2_THRESHOLD))
- return SLEEP_GEAR1_THRESHOLD;
- /* If zero count is greater than 1000, sleep time should be 1000 us */
- else if (zero_rx_packet_count >= SLEEP_GEAR2_THRESHOLD)
- return SLEEP_GEAR2_THRESHOLD;
+ /* If zero count is less than 100, sleep 1us */
+ if (zero_rx_packet_count < SUSPEND_THRESHOLD)
+ return MINIMUM_SLEEP_TIME;
+ /* If zero count is less than 1000, sleep 100 us which is the minimum latency
+ switching from C3/C6 to C0
+ */
+ else
+ return SUSPEND_THRESHOLD;
return 0;
}
@@ -762,6 +770,35 @@ power_freq_scaleup_heuristic(unsigned lcore_id,
return FREQ_CURRENT;
}
+/**
+ * force polling thread sleep until one-shot rx interrupt triggers
+ * @param port_id
+ * Port id.
+ * @param queue_id
+ * Rx queue id.
+ * @return
+ * 0 on success
+ */
+static int
+sleep_until_rx_interrupt(uint8_t port_id, uint8_t queue_id)
+{
+ /* Enable one-shot rx interrupt */
+ rte_eth_dev_rx_queue_intr_enable(port_id, queue_id);
+
+ RTE_LOG(INFO, L3FWD_POWER,
+ "lcore %u sleeps until interrupt on port%d,rxq%d triggers\n",
+ rte_lcore_id(), port_id, queue_id);
+ rte_eal_wait_rx_intr(port_id, queue_id);
+ RTE_LOG(INFO, L3FWD_POWER,
+ "lcore %u is waked up from rx interrupt on port%d,rxq%d\n",
+ rte_lcore_id(), port_id, queue_id);
+
+ /* Disable one-shot rx interrupt */
+ rte_eth_dev_rx_queue_intr_disable(port_id, queue_id);
+
+ return 0;
+}
+
/* main processing loop */
static int
main_loop(__attribute__((unused)) void *dummy)
@@ -775,7 +812,6 @@ main_loop(__attribute__((unused)) void *dummy)
struct lcore_conf *qconf;
struct lcore_rx_queue *rx_queue;
enum freq_scale_hint_t lcore_scaleup_hint;
-
uint32_t lcore_rx_idle_count = 0;
uint32_t lcore_idle_hint = 0;
@@ -835,6 +871,8 @@ main_loop(__attribute__((unused)) void *dummy)
prev_tsc_power = cur_tsc_power;
}
+
+start_rx:
/*
* Read packet from RX queues
*/
@@ -848,6 +886,7 @@ main_loop(__attribute__((unused)) void *dummy)
nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
MAX_PKT_BURST);
+
stats[lcore_id].nb_rx_processed += nb_rx;
if (unlikely(nb_rx == 0)) {
/**
@@ -910,10 +949,13 @@ main_loop(__attribute__((unused)) void *dummy)
rx_queue->freq_up_hint;
}
- if (lcore_scaleup_hint == FREQ_HIGHEST)
- rte_power_freq_max(lcore_id);
- else if (lcore_scaleup_hint == FREQ_HIGHER)
- rte_power_freq_up(lcore_id);
+ if (lcore_scaleup_hint == FREQ_HIGHEST) {
+ if (rte_power_freq_max)
+ rte_power_freq_max(lcore_id);
+ } else if (lcore_scaleup_hint == FREQ_HIGHER) {
+ if (rte_power_freq_up)
+ rte_power_freq_up(lcore_id);
+ }
} else {
/**
* All Rx queues empty in recent consecutive polls,
@@ -928,21 +970,55 @@ main_loop(__attribute__((unused)) void *dummy)
lcore_idle_hint = rx_queue->idle_hint;
}
- if ( lcore_idle_hint < SLEEP_GEAR1_THRESHOLD)
+ if (lcore_idle_hint < SUSPEND_THRESHOLD)
/**
- * execute "pause" instruction to avoid context
- * switch for short sleep.
- */
+ * execute "pause" instruction to avoid context
+ * switch which generally take hundres of microsecond
+ * for short sleep.
+ */
rte_delay_us(lcore_idle_hint);
- else
- /* long sleep force runing thread to suspend */
- usleep(lcore_idle_hint);
-
+ else {
+ /* suspend untill rx interrupt trigges */
+ sleep_until_rx_interrupt(
+ qconf->rx_queue_list[0].port_id,
+ qconf->rx_queue_list[0].queue_id);
+ /* start receiving packets immediately */
+ goto start_rx;
+ }
stats[lcore_id].sleep_time += lcore_idle_hint;
}
}
}
+/**
+ * It will be called as the callback for specified port after a LSI interrupt
+ * has been fully handled. This callback needs to be implemented carefully as
+ * it will be called in the interrupt host thread which is different from the
+ * application main thread.
+ *
+ * @param port_id
+ * Port id.
+ * @param type
+ * event type.
+ * @param param
+ * Pointer to(address of) the parameters.
+ *
+ * @return
+ * void.
+ */
+
+/*
+static void
+rx_interrupt_event_callback(uint8_t port_id, enum rte_eth_event_type type, void *param)
+{
+ uint64_t rx_queues = *((uint64_t *)param);
+
+ port_id = port_id + 1;
+ if(type == RTE_ETH_EVENT_INTR_RX)
+ port_id = rx_queues;
+}
+*/
+
static int
check_lcore_params(void)
{
@@ -1270,7 +1346,7 @@ setup_hash(int socketid)
char s[64];
/* create ipv4 hash */
- snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
+ rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
ipv4_l3fwd_hash_params.name = s;
ipv4_l3fwd_hash_params.socket_id = socketid;
ipv4_l3fwd_lookup_struct[socketid] =
@@ -1280,7 +1356,7 @@ setup_hash(int socketid)
"socket %d\n", socketid);
/* create ipv6 hash */
- snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
+ rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
ipv6_l3fwd_hash_params.name = s;
ipv6_l3fwd_hash_params.socket_id = socketid;
ipv6_l3fwd_lookup_struct[socketid] =
@@ -1476,6 +1552,7 @@ main(int argc, char **argv)
unsigned lcore_id;
uint64_t hz;
uint32_t n_tx_queue, nb_lcores;
+ uint32_t dev_rxq_num, dev_txq_num;
uint8_t portid, nb_rx_queue, queue, socketid;
/* catch SIGINT and restore cpufreq governor to ondemand */
@@ -1525,10 +1602,18 @@ main(int argc, char **argv)
printf("Initializing port %d ... ", portid );
fflush(stdout);
+ rte_eth_dev_info_get(portid, &dev_info);
+ dev_rxq_num = dev_info.max_rx_queues;
+ dev_txq_num = dev_info.max_tx_queues;
+
nb_rx_queue = get_port_n_rx_queues(portid);
+ if (nb_rx_queue > dev_rxq_num)
+ rte_exit(EXIT_FAILURE, "Cannot configure not existed rxq: "
+ "port=%d\n", portid);
+
n_tx_queue = nb_lcores;
- if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
- n_tx_queue = MAX_TX_QUEUE_PER_PORT;
+ if (n_tx_queue > dev_txq_num)
+ n_tx_queue = dev_txq_num;
printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
nb_rx_queue, (unsigned)n_tx_queue );
ret = rte_eth_dev_configure(portid, nb_rx_queue,
@@ -1552,6 +1637,9 @@ main(int argc, char **argv)
if (rte_lcore_is_enabled(lcore_id) == 0)
continue;
+ if (queueid >= dev_txq_num)
+ continue;
+
if (numa_on)
socketid = \
(uint8_t)rte_lcore_to_socket_id(lcore_id);
@@ -1586,8 +1674,9 @@ main(int argc, char **argv)
/* init power management library */
ret = rte_power_init(lcore_id);
if (ret)
- rte_exit(EXIT_FAILURE, "Power management library "
- "initialization failed on core%u\n", lcore_id);
+ rte_log(RTE_LOG_ERR, RTE_LOGTYPE_POWER,
+ "Power management library initialization "
+ "failed on core%u", lcore_id);
/* init timer structures for each enabled lcore */
rte_timer_init(&power_timers[lcore_id]);
@@ -1635,7 +1724,6 @@ main(int argc, char **argv)
if (ret < 0)
rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, "
"port=%d\n", ret, portid);
-
/*
* If enabled, put device in promiscuous mode.
* This allows IO forwarding mode to forward packets
--
1.8.1.4
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch
2015-01-28 9:50 ` [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch Danny Zhou
@ 2015-01-28 18:34 ` Liang, Cunming
2015-01-29 4:07 ` Zhou, Danny
0 siblings, 1 reply; 16+ messages in thread
From: Liang, Cunming @ 2015-01-28 18:34 UTC (permalink / raw)
To: Zhou, Danny, dev
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Danny Zhou
> Sent: Wednesday, January 28, 2015 2:51 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt
> and polling/interrupt mode switch
>
> Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> ---
> examples/l3fwd-power/main.c | 170
> +++++++++++++++++++++++++++++++++-----------
> 1 file changed, 129 insertions(+), 41 deletions(-)
>
> diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
> index f6b55b9..e6e4f55 100644
> --- a/examples/l3fwd-power/main.c
> +++ b/examples/l3fwd-power/main.c
> @@ -75,12 +75,13 @@
> #include <rte_string_fns.h>
> #include <rte_timer.h>
> #include <rte_power.h>
> +#include <rte_eal.h>
>
> #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1
>
> #define MAX_PKT_BURST 32
>
> -#define MIN_ZERO_POLL_COUNT 5
> +#define MIN_ZERO_POLL_COUNT 10
>
> /* around 100ms at 2 Ghz */
> #define TIMER_RESOLUTION_CYCLES 200000000ULL
> @@ -188,6 +189,9 @@ struct lcore_rx_queue {
> #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
> #define MAX_RX_QUEUE_PER_PORT 128
>
> +#define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16
> +
> +
> #define MAX_LCORE_PARAMS 1024
> struct lcore_params {
> uint8_t port_id;
> @@ -214,7 +218,7 @@ static uint16_t nb_lcore_params =
> sizeof(lcore_params_array_default) /
>
> static struct rte_eth_conf port_conf = {
> .rxmode = {
> - .mq_mode = ETH_MQ_RX_RSS,
> + .mq_mode = ETH_MQ_RX_RSS,
> .max_rx_pkt_len = ETHER_MAX_LEN,
> .split_hdr_size = 0,
> .header_split = 0, /**< Header Split disabled */
> @@ -226,11 +230,14 @@ static struct rte_eth_conf port_conf = {
> .rx_adv_conf = {
> .rss_conf = {
> .rss_key = NULL,
> - .rss_hf = ETH_RSS_IP,
> + .rss_hf = ETH_RSS_UDP,
> },
> },
> .txmode = {
> - .mq_mode = ETH_DCB_NONE,
> + .mq_mode = ETH_MQ_TX_NONE,
> + },
> + .intr_conf = {
> + .rxq = 1, /**< rxq interrupt feature enabled */
> },
> };
>
> @@ -402,19 +409,22 @@ power_timer_cb(__attribute__((unused)) struct
> rte_timer *tim,
> /* accumulate total execution time in us when callback is invoked */
> sleep_time_ratio = (float)(stats[lcore_id].sleep_time) /
> (float)SCALING_PERIOD;
> -
> /**
> * check whether need to scale down frequency a step if it sleep a lot.
> */
> - if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD)
> - rte_power_freq_down(lcore_id);
> + if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) {
> + if (rte_power_freq_down)
> + rte_power_freq_down(lcore_id);
> + }
> else if ( (unsigned)(stats[lcore_id].nb_rx_processed /
> - stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST)
> + stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) {
> /**
> * scale down a step if average packet per iteration less
> * than expectation.
> */
> - rte_power_freq_down(lcore_id);
> + if (rte_power_freq_down)
> + rte_power_freq_down(lcore_id);
> + }
>
> /**
> * initialize another timer according to current frequency to ensure
> @@ -707,22 +717,20 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t
> portid,
>
> }
>
> -#define SLEEP_GEAR1_THRESHOLD 100
> -#define SLEEP_GEAR2_THRESHOLD 1000
> +#define MINIMUM_SLEEP_TIME 1
> +#define SUSPEND_THRESHOLD 300
>
> static inline uint32_t
> power_idle_heuristic(uint32_t zero_rx_packet_count)
> {
> - /* If zero count is less than 100, use it as the sleep time in us */
> - if (zero_rx_packet_count < SLEEP_GEAR1_THRESHOLD)
> - return zero_rx_packet_count;
> - /* If zero count is less than 1000, sleep time should be 100 us */
> - else if ((zero_rx_packet_count >= SLEEP_GEAR1_THRESHOLD) &&
> - (zero_rx_packet_count < SLEEP_GEAR2_THRESHOLD))
> - return SLEEP_GEAR1_THRESHOLD;
> - /* If zero count is greater than 1000, sleep time should be 1000 us */
> - else if (zero_rx_packet_count >= SLEEP_GEAR2_THRESHOLD)
> - return SLEEP_GEAR2_THRESHOLD;
> + /* If zero count is less than 100, sleep 1us */
> + if (zero_rx_packet_count < SUSPEND_THRESHOLD)
> + return MINIMUM_SLEEP_TIME;
> + /* If zero count is less than 1000, sleep 100 us which is the minimum
> latency
> + switching from C3/C6 to C0
> + */
> + else
> + return SUSPEND_THRESHOLD;
>
> return 0;
> }
> @@ -762,6 +770,35 @@ power_freq_scaleup_heuristic(unsigned lcore_id,
> return FREQ_CURRENT;
> }
>
> +/**
> + * force polling thread sleep until one-shot rx interrupt triggers
> + * @param port_id
> + * Port id.
> + * @param queue_id
> + * Rx queue id.
> + * @return
> + * 0 on success
> + */
> +static int
> +sleep_until_rx_interrupt(uint8_t port_id, uint8_t queue_id)
> +{
> + /* Enable one-shot rx interrupt */
> + rte_eth_dev_rx_queue_intr_enable(port_id, queue_id);
> +
> + RTE_LOG(INFO, L3FWD_POWER,
> + "lcore %u sleeps until interrupt on port%d,rxq%d triggers\n",
> + rte_lcore_id(), port_id, queue_id);
> + rte_eal_wait_rx_intr(port_id, queue_id);
> + RTE_LOG(INFO, L3FWD_POWER,
> + "lcore %u is waked up from rx interrupt on port%d,rxq%d\n",
> + rte_lcore_id(), port_id, queue_id);
> +
> + /* Disable one-shot rx interrupt */
> + rte_eth_dev_rx_queue_intr_disable(port_id, queue_id);
> +
> + return 0;
> +}
> +
> /* main processing loop */
> static int
> main_loop(__attribute__((unused)) void *dummy)
> @@ -775,7 +812,6 @@ main_loop(__attribute__((unused)) void *dummy)
> struct lcore_conf *qconf;
> struct lcore_rx_queue *rx_queue;
> enum freq_scale_hint_t lcore_scaleup_hint;
> -
> uint32_t lcore_rx_idle_count = 0;
> uint32_t lcore_idle_hint = 0;
>
> @@ -835,6 +871,8 @@ main_loop(__attribute__((unused)) void *dummy)
> prev_tsc_power = cur_tsc_power;
> }
>
> +
> +start_rx:
> /*
> * Read packet from RX queues
> */
> @@ -848,6 +886,7 @@ main_loop(__attribute__((unused)) void *dummy)
>
> nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
> MAX_PKT_BURST);
> +
> stats[lcore_id].nb_rx_processed += nb_rx;
> if (unlikely(nb_rx == 0)) {
> /**
> @@ -910,10 +949,13 @@ main_loop(__attribute__((unused)) void *dummy)
> rx_queue->freq_up_hint;
> }
>
> - if (lcore_scaleup_hint == FREQ_HIGHEST)
> - rte_power_freq_max(lcore_id);
> - else if (lcore_scaleup_hint == FREQ_HIGHER)
> - rte_power_freq_up(lcore_id);
> + if (lcore_scaleup_hint == FREQ_HIGHEST) {
> + if (rte_power_freq_max)
> + rte_power_freq_max(lcore_id);
> + } else if (lcore_scaleup_hint == FREQ_HIGHER) {
> + if (rte_power_freq_up)
> + rte_power_freq_up(lcore_id);
> + }
> } else {
> /**
> * All Rx queues empty in recent consecutive polls,
> @@ -928,21 +970,55 @@ main_loop(__attribute__((unused)) void *dummy)
> lcore_idle_hint = rx_queue->idle_hint;
> }
>
> - if ( lcore_idle_hint < SLEEP_GEAR1_THRESHOLD)
> + if (lcore_idle_hint < SUSPEND_THRESHOLD)
> /**
> - * execute "pause" instruction to avoid context
> - * switch for short sleep.
> - */
> + * execute "pause" instruction to avoid context
> + * switch which generally take hundres of microsecond
> + * for short sleep.
> + */
> rte_delay_us(lcore_idle_hint);
> - else
> - /* long sleep force runing thread to suspend */
> - usleep(lcore_idle_hint);
> -
> + else {
> + /* suspend untill rx interrupt trigges */
> + sleep_until_rx_interrupt(
> + qconf->rx_queue_list[0].port_id,
> + qconf->rx_queue_list[0].queue_id);
> + /* start receiving packets immediately */
> + goto start_rx;
> + }
> stats[lcore_id].sleep_time += lcore_idle_hint;
> }
> }
> }
>
> +/**
> + * It will be called as the callback for specified port after a LSI interrupt
> + * has been fully handled. This callback needs to be implemented carefully as
> + * it will be called in the interrupt host thread which is different from the
> + * application main thread.
> + *
> + * @param port_id
> + * Port id.
> + * @param type
> + * event type.
> + * @param param
> + * Pointer to(address of) the parameters.
> + *
> + * @return
> + * void.
> + */
> +
> +/*
> +static void
> +rx_interrupt_event_callback(uint8_t port_id, enum rte_eth_event_type type,
> void *param)
> +{
> + uint64_t rx_queues = *((uint64_t *)param);
> +
> + port_id = port_id + 1;
> + if(type == RTE_ETH_EVENT_INTR_RX)
> + port_id = rx_queues;
[LCM] What's bunch of things for ?
> +}
> +*/
> +
> static int
> check_lcore_params(void)
> {
> @@ -1270,7 +1346,7 @@ setup_hash(int socketid)
> char s[64];
>
> /* create ipv4 hash */
> - snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
> + rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
> ipv4_l3fwd_hash_params.name = s;
> ipv4_l3fwd_hash_params.socket_id = socketid;
> ipv4_l3fwd_lookup_struct[socketid] =
> @@ -1280,7 +1356,7 @@ setup_hash(int socketid)
> "socket %d\n", socketid);
>
> /* create ipv6 hash */
> - snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
> + rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
> ipv6_l3fwd_hash_params.name = s;
> ipv6_l3fwd_hash_params.socket_id = socketid;
> ipv6_l3fwd_lookup_struct[socketid] =
> @@ -1476,6 +1552,7 @@ main(int argc, char **argv)
> unsigned lcore_id;
> uint64_t hz;
> uint32_t n_tx_queue, nb_lcores;
> + uint32_t dev_rxq_num, dev_txq_num;
> uint8_t portid, nb_rx_queue, queue, socketid;
>
> /* catch SIGINT and restore cpufreq governor to ondemand */
> @@ -1525,10 +1602,18 @@ main(int argc, char **argv)
> printf("Initializing port %d ... ", portid );
> fflush(stdout);
>
> + rte_eth_dev_info_get(portid, &dev_info);
> + dev_rxq_num = dev_info.max_rx_queues;
> + dev_txq_num = dev_info.max_tx_queues;
> +
> nb_rx_queue = get_port_n_rx_queues(portid);
> + if (nb_rx_queue > dev_rxq_num)
> + rte_exit(EXIT_FAILURE, "Cannot configure not existed rxq: "
> + "port=%d\n", portid);
> +
> n_tx_queue = nb_lcores;
> - if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
> - n_tx_queue = MAX_TX_QUEUE_PER_PORT;
> + if (n_tx_queue > dev_txq_num)
> + n_tx_queue = dev_txq_num;
> printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
> nb_rx_queue, (unsigned)n_tx_queue );
> ret = rte_eth_dev_configure(portid, nb_rx_queue,
> @@ -1552,6 +1637,9 @@ main(int argc, char **argv)
> if (rte_lcore_is_enabled(lcore_id) == 0)
> continue;
>
> + if (queueid >= dev_txq_num)
> + continue;
> +
> if (numa_on)
> socketid = \
> (uint8_t)rte_lcore_to_socket_id(lcore_id);
> @@ -1586,8 +1674,9 @@ main(int argc, char **argv)
> /* init power management library */
> ret = rte_power_init(lcore_id);
> if (ret)
> - rte_exit(EXIT_FAILURE, "Power management library "
> - "initialization failed on core%u\n", lcore_id);
> + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_POWER,
> + "Power management library initialization "
> + "failed on core%u", lcore_id);
>
> /* init timer structures for each enabled lcore */
> rte_timer_init(&power_timers[lcore_id]);
> @@ -1635,7 +1724,6 @@ main(int argc, char **argv)
> if (ret < 0)
> rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, "
> "port=%d\n", ret, portid);
> -
> /*
> * If enabled, put device in promiscuous mode.
> * This allows IO forwarding mode to forward packets
> --
> 1.8.1.4
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch
2015-01-28 18:34 ` Liang, Cunming
@ 2015-01-29 4:07 ` Zhou, Danny
0 siblings, 0 replies; 16+ messages in thread
From: Zhou, Danny @ 2015-01-29 4:07 UTC (permalink / raw)
To: Liang, Cunming, dev
> -----Original Message-----
> From: Liang, Cunming
> Sent: Thursday, January 29, 2015 2:34 AM
> To: Zhou, Danny; dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch
>
>
>
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Danny Zhou
> > Sent: Wednesday, January 28, 2015 2:51 AM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt
> > and polling/interrupt mode switch
> >
> > Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> > ---
> > examples/l3fwd-power/main.c | 170
> > +++++++++++++++++++++++++++++++++-----------
> > 1 file changed, 129 insertions(+), 41 deletions(-)
> >
> > diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
> > index f6b55b9..e6e4f55 100644
> > --- a/examples/l3fwd-power/main.c
> > +++ b/examples/l3fwd-power/main.c
> > @@ -75,12 +75,13 @@
> > #include <rte_string_fns.h>
> > #include <rte_timer.h>
> > #include <rte_power.h>
> > +#include <rte_eal.h>
> >
> > #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1
> >
> > #define MAX_PKT_BURST 32
> >
> > -#define MIN_ZERO_POLL_COUNT 5
> > +#define MIN_ZERO_POLL_COUNT 10
> >
> > /* around 100ms at 2 Ghz */
> > #define TIMER_RESOLUTION_CYCLES 200000000ULL
> > @@ -188,6 +189,9 @@ struct lcore_rx_queue {
> > #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
> > #define MAX_RX_QUEUE_PER_PORT 128
> >
> > +#define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16
> > +
> > +
> > #define MAX_LCORE_PARAMS 1024
> > struct lcore_params {
> > uint8_t port_id;
> > @@ -214,7 +218,7 @@ static uint16_t nb_lcore_params =
> > sizeof(lcore_params_array_default) /
> >
> > static struct rte_eth_conf port_conf = {
> > .rxmode = {
> > - .mq_mode = ETH_MQ_RX_RSS,
> > + .mq_mode = ETH_MQ_RX_RSS,
> > .max_rx_pkt_len = ETHER_MAX_LEN,
> > .split_hdr_size = 0,
> > .header_split = 0, /**< Header Split disabled */
> > @@ -226,11 +230,14 @@ static struct rte_eth_conf port_conf = {
> > .rx_adv_conf = {
> > .rss_conf = {
> > .rss_key = NULL,
> > - .rss_hf = ETH_RSS_IP,
> > + .rss_hf = ETH_RSS_UDP,
> > },
> > },
> > .txmode = {
> > - .mq_mode = ETH_DCB_NONE,
> > + .mq_mode = ETH_MQ_TX_NONE,
> > + },
> > + .intr_conf = {
> > + .rxq = 1, /**< rxq interrupt feature enabled */
> > },
> > };
> >
> > @@ -402,19 +409,22 @@ power_timer_cb(__attribute__((unused)) struct
> > rte_timer *tim,
> > /* accumulate total execution time in us when callback is invoked */
> > sleep_time_ratio = (float)(stats[lcore_id].sleep_time) /
> > (float)SCALING_PERIOD;
> > -
> > /**
> > * check whether need to scale down frequency a step if it sleep a lot.
> > */
> > - if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD)
> > - rte_power_freq_down(lcore_id);
> > + if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) {
> > + if (rte_power_freq_down)
> > + rte_power_freq_down(lcore_id);
> > + }
> > else if ( (unsigned)(stats[lcore_id].nb_rx_processed /
> > - stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST)
> > + stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) {
> > /**
> > * scale down a step if average packet per iteration less
> > * than expectation.
> > */
> > - rte_power_freq_down(lcore_id);
> > + if (rte_power_freq_down)
> > + rte_power_freq_down(lcore_id);
> > + }
> >
> > /**
> > * initialize another timer according to current frequency to ensure
> > @@ -707,22 +717,20 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t
> > portid,
> >
> > }
> >
> > -#define SLEEP_GEAR1_THRESHOLD 100
> > -#define SLEEP_GEAR2_THRESHOLD 1000
> > +#define MINIMUM_SLEEP_TIME 1
> > +#define SUSPEND_THRESHOLD 300
> >
> > static inline uint32_t
> > power_idle_heuristic(uint32_t zero_rx_packet_count)
> > {
> > - /* If zero count is less than 100, use it as the sleep time in us */
> > - if (zero_rx_packet_count < SLEEP_GEAR1_THRESHOLD)
> > - return zero_rx_packet_count;
> > - /* If zero count is less than 1000, sleep time should be 100 us */
> > - else if ((zero_rx_packet_count >= SLEEP_GEAR1_THRESHOLD) &&
> > - (zero_rx_packet_count < SLEEP_GEAR2_THRESHOLD))
> > - return SLEEP_GEAR1_THRESHOLD;
> > - /* If zero count is greater than 1000, sleep time should be 1000 us */
> > - else if (zero_rx_packet_count >= SLEEP_GEAR2_THRESHOLD)
> > - return SLEEP_GEAR2_THRESHOLD;
> > + /* If zero count is less than 100, sleep 1us */
> > + if (zero_rx_packet_count < SUSPEND_THRESHOLD)
> > + return MINIMUM_SLEEP_TIME;
> > + /* If zero count is less than 1000, sleep 100 us which is the minimum
> > latency
> > + switching from C3/C6 to C0
> > + */
> > + else
> > + return SUSPEND_THRESHOLD;
> >
> > return 0;
> > }
> > @@ -762,6 +770,35 @@ power_freq_scaleup_heuristic(unsigned lcore_id,
> > return FREQ_CURRENT;
> > }
> >
> > +/**
> > + * force polling thread sleep until one-shot rx interrupt triggers
> > + * @param port_id
> > + * Port id.
> > + * @param queue_id
> > + * Rx queue id.
> > + * @return
> > + * 0 on success
> > + */
> > +static int
> > +sleep_until_rx_interrupt(uint8_t port_id, uint8_t queue_id)
> > +{
> > + /* Enable one-shot rx interrupt */
> > + rte_eth_dev_rx_queue_intr_enable(port_id, queue_id);
> > +
> > + RTE_LOG(INFO, L3FWD_POWER,
> > + "lcore %u sleeps until interrupt on port%d,rxq%d triggers\n",
> > + rte_lcore_id(), port_id, queue_id);
> > + rte_eal_wait_rx_intr(port_id, queue_id);
> > + RTE_LOG(INFO, L3FWD_POWER,
> > + "lcore %u is waked up from rx interrupt on port%d,rxq%d\n",
> > + rte_lcore_id(), port_id, queue_id);
> > +
> > + /* Disable one-shot rx interrupt */
> > + rte_eth_dev_rx_queue_intr_disable(port_id, queue_id);
> > +
> > + return 0;
> > +}
> > +
> > /* main processing loop */
> > static int
> > main_loop(__attribute__((unused)) void *dummy)
> > @@ -775,7 +812,6 @@ main_loop(__attribute__((unused)) void *dummy)
> > struct lcore_conf *qconf;
> > struct lcore_rx_queue *rx_queue;
> > enum freq_scale_hint_t lcore_scaleup_hint;
> > -
> > uint32_t lcore_rx_idle_count = 0;
> > uint32_t lcore_idle_hint = 0;
> >
> > @@ -835,6 +871,8 @@ main_loop(__attribute__((unused)) void *dummy)
> > prev_tsc_power = cur_tsc_power;
> > }
> >
> > +
> > +start_rx:
> > /*
> > * Read packet from RX queues
> > */
> > @@ -848,6 +886,7 @@ main_loop(__attribute__((unused)) void *dummy)
> >
> > nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
> > MAX_PKT_BURST);
> > +
> > stats[lcore_id].nb_rx_processed += nb_rx;
> > if (unlikely(nb_rx == 0)) {
> > /**
> > @@ -910,10 +949,13 @@ main_loop(__attribute__((unused)) void *dummy)
> > rx_queue->freq_up_hint;
> > }
> >
> > - if (lcore_scaleup_hint == FREQ_HIGHEST)
> > - rte_power_freq_max(lcore_id);
> > - else if (lcore_scaleup_hint == FREQ_HIGHER)
> > - rte_power_freq_up(lcore_id);
> > + if (lcore_scaleup_hint == FREQ_HIGHEST) {
> > + if (rte_power_freq_max)
> > + rte_power_freq_max(lcore_id);
> > + } else if (lcore_scaleup_hint == FREQ_HIGHER) {
> > + if (rte_power_freq_up)
> > + rte_power_freq_up(lcore_id);
> > + }
> > } else {
> > /**
> > * All Rx queues empty in recent consecutive polls,
> > @@ -928,21 +970,55 @@ main_loop(__attribute__((unused)) void *dummy)
> > lcore_idle_hint = rx_queue->idle_hint;
> > }
> >
> > - if ( lcore_idle_hint < SLEEP_GEAR1_THRESHOLD)
> > + if (lcore_idle_hint < SUSPEND_THRESHOLD)
> > /**
> > - * execute "pause" instruction to avoid context
> > - * switch for short sleep.
> > - */
> > + * execute "pause" instruction to avoid context
> > + * switch which generally take hundres of microsecond
> > + * for short sleep.
> > + */
> > rte_delay_us(lcore_idle_hint);
> > - else
> > - /* long sleep force runing thread to suspend */
> > - usleep(lcore_idle_hint);
> > -
> > + else {
> > + /* suspend untill rx interrupt trigges */
> > + sleep_until_rx_interrupt(
> > + qconf->rx_queue_list[0].port_id,
> > + qconf->rx_queue_list[0].queue_id);
> > + /* start receiving packets immediately */
> > + goto start_rx;
> > + }
> > stats[lcore_id].sleep_time += lcore_idle_hint;
> > }
> > }
> > }
> >
> > +/**
> > + * It will be called as the callback for specified port after a LSI interrupt
> > + * has been fully handled. This callback needs to be implemented carefully as
> > + * it will be called in the interrupt host thread which is different from the
> > + * application main thread.
> > + *
> > + * @param port_id
> > + * Port id.
> > + * @param type
> > + * event type.
> > + * @param param
> > + * Pointer to(address of) the parameters.
> > + *
> > + * @return
> > + * void.
> > + */
> > +
> > +/*
> > +static void
> > +rx_interrupt_event_callback(uint8_t port_id, enum rte_eth_event_type type,
> > void *param)
> > +{
> > + uint64_t rx_queues = *((uint64_t *)param);
> > +
> > + port_id = port_id + 1;
> > + if(type == RTE_ETH_EVENT_INTR_RX)
> > + port_id = rx_queues;
> [LCM] What's bunch of things for ?
Debug related code which will be removed in V2 patch.
>
> > +}
> > +*/
> > +
> > static int
> > check_lcore_params(void)
> > {
> > @@ -1270,7 +1346,7 @@ setup_hash(int socketid)
> > char s[64];
> >
> > /* create ipv4 hash */
> > - snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
> > + rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
> > ipv4_l3fwd_hash_params.name = s;
> > ipv4_l3fwd_hash_params.socket_id = socketid;
> > ipv4_l3fwd_lookup_struct[socketid] =
> > @@ -1280,7 +1356,7 @@ setup_hash(int socketid)
> > "socket %d\n", socketid);
> >
> > /* create ipv6 hash */
> > - snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
> > + rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
> > ipv6_l3fwd_hash_params.name = s;
> > ipv6_l3fwd_hash_params.socket_id = socketid;
> > ipv6_l3fwd_lookup_struct[socketid] =
> > @@ -1476,6 +1552,7 @@ main(int argc, char **argv)
> > unsigned lcore_id;
> > uint64_t hz;
> > uint32_t n_tx_queue, nb_lcores;
> > + uint32_t dev_rxq_num, dev_txq_num;
> > uint8_t portid, nb_rx_queue, queue, socketid;
> >
> > /* catch SIGINT and restore cpufreq governor to ondemand */
> > @@ -1525,10 +1602,18 @@ main(int argc, char **argv)
> > printf("Initializing port %d ... ", portid );
> > fflush(stdout);
> >
> > + rte_eth_dev_info_get(portid, &dev_info);
> > + dev_rxq_num = dev_info.max_rx_queues;
> > + dev_txq_num = dev_info.max_tx_queues;
> > +
> > nb_rx_queue = get_port_n_rx_queues(portid);
> > + if (nb_rx_queue > dev_rxq_num)
> > + rte_exit(EXIT_FAILURE, "Cannot configure not existed rxq: "
> > + "port=%d\n", portid);
> > +
> > n_tx_queue = nb_lcores;
> > - if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
> > - n_tx_queue = MAX_TX_QUEUE_PER_PORT;
> > + if (n_tx_queue > dev_txq_num)
> > + n_tx_queue = dev_txq_num;
> > printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
> > nb_rx_queue, (unsigned)n_tx_queue );
> > ret = rte_eth_dev_configure(portid, nb_rx_queue,
> > @@ -1552,6 +1637,9 @@ main(int argc, char **argv)
> > if (rte_lcore_is_enabled(lcore_id) == 0)
> > continue;
> >
> > + if (queueid >= dev_txq_num)
> > + continue;
> > +
> > if (numa_on)
> > socketid = \
> > (uint8_t)rte_lcore_to_socket_id(lcore_id);
> > @@ -1586,8 +1674,9 @@ main(int argc, char **argv)
> > /* init power management library */
> > ret = rte_power_init(lcore_id);
> > if (ret)
> > - rte_exit(EXIT_FAILURE, "Power management library "
> > - "initialization failed on core%u\n", lcore_id);
> > + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_POWER,
> > + "Power management library initialization "
> > + "failed on core%u", lcore_id);
> >
> > /* init timer structures for each enabled lcore */
> > rte_timer_init(&power_timers[lcore_id]);
> > @@ -1635,7 +1724,6 @@ main(int argc, char **argv)
> > if (ret < 0)
> > rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, "
> > "port=%d\n", ret, portid);
> > -
> > /*
> > * If enabled, put device in promiscuous mode.
> > * This allows IO forwarding mode to forward packets
> > --
> > 1.8.1.4
^ permalink raw reply [flat|nested] 16+ messages in thread