DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH v2] net/i40e: remove VF interrupt handler
@ 2018-06-22  0:44 Qi Zhang
  2018-06-22 15:44 ` Stephen Hemminger
  0 siblings, 1 reply; 6+ messages in thread
From: Qi Zhang @ 2018-06-22  0:44 UTC (permalink / raw)
  To: beilei.xing; +Cc: jingjing.wu, de.yu, dev, Qi Zhang

For i40evf, internal rx interrupt and adminq interrupt share the same
source, that cause a lot cpu cycles be wasted on interrupt handler
on rx path. This is complained by customers which require low latency
(when set I40E_ITR_INTERVAL to small value), but have to be sufferred by
tremendous interrupts handling that eat significant CPU resources.

The patch disable pci interrupt and remove the interrupt handler,
replace it with a low frequency (50ms) interrupt polling daemon
which is implemented by registering a alarm callback periodly, this
save CPU time significently: On a typical x86 server with 2.1GHz CPU,
with low latency configure (32us) we saw CPU usage from top commmand
reduced from 20% to 0% on management core in testpmd).

Also with the new method we can remove compile option: I40E_ITR_INTERVAL
which is used to balance between low latency and low CPU usage previously.
Now we don't need it since we can reach both at same time.

Suggested-by: Jingjing Wu <jingjing.wu@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
---

v2:
- update doc

 config/common_base                |  2 --
 doc/guides/nics/i40e.rst          |  5 -----
 drivers/net/i40e/i40e_ethdev.c    |  3 +--
 drivers/net/i40e/i40e_ethdev.h    | 22 +++++++++++-----------
 drivers/net/i40e/i40e_ethdev_vf.c | 36 ++++++++++++++----------------------
 5 files changed, 26 insertions(+), 42 deletions(-)

diff --git a/config/common_base b/config/common_base
index 6b0d1cbbb..9e21c6865 100644
--- a/config/common_base
+++ b/config/common_base
@@ -264,8 +264,6 @@ CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
 CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC=n
 CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF=64
 CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
-# interval up to 8160 us, aligned to 2 (or default value)
-CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1
 
 #
 # Compile burst-oriented FM10K PMD
diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
index 18549bf5a..3fc4ceac7 100644
--- a/doc/guides/nics/i40e.rst
+++ b/doc/guides/nics/i40e.rst
@@ -96,11 +96,6 @@ Please note that enabling debugging options may affect system performance.
 
   Number of queues reserved for each VMDQ Pool.
 
-- ``CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL`` (default ``-1``)
-
-  Interrupt Throttling interval.
-
-
 Runtime Config Options
 ~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 13c5d3296..c8f9566e0 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -1829,8 +1829,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t msix_vect,
 	/* Write first RX queue to Link list register as the head element */
 	if (vsi->type != I40E_VSI_SRIOV) {
 		uint16_t interval =
-			i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 1,
-					       pf->support_multi_driver);
+			i40e_calc_itr_interval(1, pf->support_multi_driver);
 
 		if (msix_vect == I40E_MISC_VEC_ID) {
 			I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0,
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 11c4c76bd..599993dac 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -178,7 +178,7 @@ enum i40e_flxpld_layer_idx {
 #define I40E_ITR_INDEX_NONE             3
 #define I40E_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
 #define I40E_QUEUE_ITR_INTERVAL_MAX     8160 /* 8160 us */
-#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 8160 /* 8160 us */
+#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
 /* Special FW support this floating VEB feature */
 #define FLOATING_VEB_SUPPORTED_FW_MAJ 5
 #define FLOATING_VEB_SUPPORTED_FW_MIN 0
@@ -1328,17 +1328,17 @@ i40e_align_floor(int n)
 }
 
 static inline uint16_t
-i40e_calc_itr_interval(int16_t interval, bool is_pf, bool is_multi_drv)
+i40e_calc_itr_interval(bool is_pf, bool is_multi_drv)
 {
-	if (interval < 0 || interval > I40E_QUEUE_ITR_INTERVAL_MAX) {
-		if (is_multi_drv) {
-			interval = I40E_QUEUE_ITR_INTERVAL_MAX;
-		} else {
-			if (is_pf)
-				interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
-			else
-				interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
-		}
+	uint16_t interval = 0;
+
+	if (is_multi_drv) {
+		interval = I40E_QUEUE_ITR_INTERVAL_MAX;
+	} else {
+		if (is_pf)
+			interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
+		else
+			interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
 	}
 
 	/* Convert to hardware count, as writing each 1 represents 2 us */
diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c
index 804e44530..ad5c069e8 100644
--- a/drivers/net/i40e/i40e_ethdev_vf.c
+++ b/drivers/net/i40e/i40e_ethdev_vf.c
@@ -44,6 +44,8 @@
 #define I40EVF_BUSY_WAIT_COUNT 50
 #define MAX_RESET_WAIT_CNT     20
 
+#define I40EVF_ALARM_INTERVAL 50000 /* us */
+
 struct i40evf_arq_msg_info {
 	enum virtchnl_ops ops;
 	enum i40e_status_code result;
@@ -1133,7 +1135,7 @@ i40evf_init_vf(struct rte_eth_dev *dev)
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	uint16_t interval =
-		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
+		i40e_calc_itr_interval(0, 0);
 
 	vf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	vf->dev_data = dev->data;
@@ -1370,7 +1372,7 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev)
  *  void
  */
 static void
-i40evf_dev_interrupt_handler(void *param)
+i40evf_dev_alarm_handler(void *param)
 {
 	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -1399,6 +1401,8 @@ i40evf_dev_interrupt_handler(void *param)
 
 done:
 	i40evf_enable_irq0(hw);
+	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
+			  i40evf_dev_alarm_handler, dev);
 }
 
 static int
@@ -1442,12 +1446,8 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)
 		return -1;
 	}
 
-	/* register callback func to eal lib */
-	rte_intr_callback_register(&pci_dev->intr_handle,
-		i40evf_dev_interrupt_handler, (void *)eth_dev);
-
-	/* enable uio intr after callback register */
-	rte_intr_enable(&pci_dev->intr_handle);
+	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
+			  i40evf_dev_alarm_handler, eth_dev);
 
 	/* configure and enable device interrupt */
 	i40evf_enable_irq0(hw);
@@ -1836,7 +1836,7 @@ i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	uint16_t interval =
-		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
+		i40e_calc_itr_interval(0, 0);
 	uint16_t msix_intr;
 
 	msix_intr = intr_handle->intr_vec[queue_id];
@@ -1859,8 +1859,6 @@ i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 
 	I40EVF_WRITE_FLUSH(hw);
 
-	rte_intr_enable(&pci_dev->intr_handle);
-
 	return 0;
 }
 
@@ -2023,10 +2021,8 @@ i40evf_dev_start(struct rte_eth_dev *dev)
 	 * queue interrupt to other VFIO vectors.
 	 * So clear uio/vfio intr/evevnfd first to avoid failure.
 	 */
-	if (dev->data->dev_conf.intr_conf.rxq != 0) {
-		rte_intr_disable(intr_handle);
+	if (dev->data->dev_conf.intr_conf.rxq != 0)
 		rte_intr_enable(intr_handle);
-	}
 
 	i40evf_enable_queues_intr(dev);
 
@@ -2050,6 +2046,9 @@ i40evf_dev_stop(struct rte_eth_dev *dev)
 
 	PMD_INIT_FUNC_TRACE();
 
+	if (dev->data->dev_conf.intr_conf.rxq != 0)
+		rte_intr_disable(intr_handle);
+
 	if (hw->adapter_stopped == 1)
 		return;
 	i40evf_stop_queues(dev);
@@ -2285,9 +2284,8 @@ static void
 i40evf_dev_close(struct rte_eth_dev *dev)
 {
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
-	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
+	rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev);
 	i40evf_dev_stop(dev);
 	i40e_dev_free_queues(dev);
 	/*
@@ -2300,12 +2298,6 @@ i40evf_dev_close(struct rte_eth_dev *dev)
 
 	i40evf_reset_vf(hw);
 	i40e_shutdown_adminq(hw);
-	/* disable uio intr before callback unregister */
-	rte_intr_disable(intr_handle);
-
-	/* unregister callback func from eal lib */
-	rte_intr_callback_unregister(intr_handle,
-				     i40evf_dev_interrupt_handler, dev);
 	i40evf_disable_irq0(hw);
 }
 
-- 
2.13.6

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [PATCH v2] net/i40e: remove VF interrupt handler
  2018-06-22  0:44 [dpdk-dev] [PATCH v2] net/i40e: remove VF interrupt handler Qi Zhang
@ 2018-06-22 15:44 ` Stephen Hemminger
  2018-06-24 10:56   ` Zhang, Qi Z
  0 siblings, 1 reply; 6+ messages in thread
From: Stephen Hemminger @ 2018-06-22 15:44 UTC (permalink / raw)
  To: Qi Zhang; +Cc: beilei.xing, jingjing.wu, de.yu, dev

On Fri, 22 Jun 2018 08:44:14 +0800
Qi Zhang <qi.z.zhang@intel.com> wrote:

> For i40evf, internal rx interrupt and adminq interrupt share the same
> source, that cause a lot cpu cycles be wasted on interrupt handler
> on rx path. This is complained by customers which require low latency
> (when set I40E_ITR_INTERVAL to small value), but have to be sufferred by
> tremendous interrupts handling that eat significant CPU resources.
> 
> The patch disable pci interrupt and remove the interrupt handler,
> replace it with a low frequency (50ms) interrupt polling daemon
> which is implemented by registering a alarm callback periodly, this
> save CPU time significently: On a typical x86 server with 2.1GHz CPU,
> with low latency configure (32us) we saw CPU usage from top commmand
> reduced from 20% to 0% on management core in testpmd).
> 
> Also with the new method we can remove compile option: I40E_ITR_INTERVAL
> which is used to balance between low latency and low CPU usage previously.
> Now we don't need it since we can reach both at same time.
> 
> Suggested-by: Jingjing Wu <jingjing.wu@intel.com>
> Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
> ---
> 
> v2:
> - update doc
> 
>  config/common_base                |  2 --
>  doc/guides/nics/i40e.rst          |  5 -----
>  drivers/net/i40e/i40e_ethdev.c    |  3 +--
>  drivers/net/i40e/i40e_ethdev.h    | 22 +++++++++++-----------
>  drivers/net/i40e/i40e_ethdev_vf.c | 36 ++++++++++++++----------------------
>  5 files changed, 26 insertions(+), 42 deletions(-)
> 
> diff --git a/config/common_base b/config/common_base
> index 6b0d1cbbb..9e21c6865 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -264,8 +264,6 @@ CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
>  CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC=n
>  CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF=64
>  CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
> -# interval up to 8160 us, aligned to 2 (or default value)
> -CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1
>  
>  #
>  # Compile burst-oriented FM10K PMD
> diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
> index 18549bf5a..3fc4ceac7 100644
> --- a/doc/guides/nics/i40e.rst
> +++ b/doc/guides/nics/i40e.rst
> @@ -96,11 +96,6 @@ Please note that enabling debugging options may affect system performance.
>  
>    Number of queues reserved for each VMDQ Pool.
>  
> -- ``CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL`` (default ``-1``)
> -
> -  Interrupt Throttling interval.
> -
> -
>  Runtime Config Options
>  ~~~~~~~~~~~~~~~~~~~~~~
>  
> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> index 13c5d3296..c8f9566e0 100644
> --- a/drivers/net/i40e/i40e_ethdev.c
> +++ b/drivers/net/i40e/i40e_ethdev.c
> @@ -1829,8 +1829,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t msix_vect,
>  	/* Write first RX queue to Link list register as the head element */
>  	if (vsi->type != I40E_VSI_SRIOV) {
>  		uint16_t interval =
> -			i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 1,
> -					       pf->support_multi_driver);
> +			i40e_calc_itr_interval(1, pf->support_multi_driver);
>  
>  		if (msix_vect == I40E_MISC_VEC_ID) {
>  			I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0,
> diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> index 11c4c76bd..599993dac 100644
> --- a/drivers/net/i40e/i40e_ethdev.h
> +++ b/drivers/net/i40e/i40e_ethdev.h
> @@ -178,7 +178,7 @@ enum i40e_flxpld_layer_idx {
>  #define I40E_ITR_INDEX_NONE             3
>  #define I40E_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
>  #define I40E_QUEUE_ITR_INTERVAL_MAX     8160 /* 8160 us */
> -#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 8160 /* 8160 us */
> +#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
>  /* Special FW support this floating VEB feature */
>  #define FLOATING_VEB_SUPPORTED_FW_MAJ 5
>  #define FLOATING_VEB_SUPPORTED_FW_MIN 0
> @@ -1328,17 +1328,17 @@ i40e_align_floor(int n)
>  }
>  
>  static inline uint16_t
> -i40e_calc_itr_interval(int16_t interval, bool is_pf, bool is_multi_drv)
> +i40e_calc_itr_interval(bool is_pf, bool is_multi_drv)
>  {
> -	if (interval < 0 || interval > I40E_QUEUE_ITR_INTERVAL_MAX) {
> -		if (is_multi_drv) {
> -			interval = I40E_QUEUE_ITR_INTERVAL_MAX;
> -		} else {
> -			if (is_pf)
> -				interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
> -			else
> -				interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
> -		}
> +	uint16_t interval = 0;
> +
> +	if (is_multi_drv) {
> +		interval = I40E_QUEUE_ITR_INTERVAL_MAX;
> +	} else {
> +		if (is_pf)
> +			interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
> +		else
> +			interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
>  	}
>  
>  	/* Convert to hardware count, as writing each 1 represents 2 us */
> diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c
> index 804e44530..ad5c069e8 100644
> --- a/drivers/net/i40e/i40e_ethdev_vf.c
> +++ b/drivers/net/i40e/i40e_ethdev_vf.c
> @@ -44,6 +44,8 @@
>  #define I40EVF_BUSY_WAIT_COUNT 50
>  #define MAX_RESET_WAIT_CNT     20
>  
> +#define I40EVF_ALARM_INTERVAL 50000 /* us */
> +
>  struct i40evf_arq_msg_info {
>  	enum virtchnl_ops ops;
>  	enum i40e_status_code result;
> @@ -1133,7 +1135,7 @@ i40evf_init_vf(struct rte_eth_dev *dev)
>  	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>  	struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
>  	uint16_t interval =
> -		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
> +		i40e_calc_itr_interval(0, 0);
>  
>  	vf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
>  	vf->dev_data = dev->data;
> @@ -1370,7 +1372,7 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev)
>   *  void
>   */
>  static void
> -i40evf_dev_interrupt_handler(void *param)
> +i40evf_dev_alarm_handler(void *param)
>  {
>  	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
>  	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> @@ -1399,6 +1401,8 @@ i40evf_dev_interrupt_handler(void *param)
>  
>  done:
>  	i40evf_enable_irq0(hw);
> +	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
> +			  i40evf_dev_alarm_handler, dev);
>  }
>  
>  static int
> @@ -1442,12 +1446,8 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)
>  		return -1;
>  	}
>  
> -	/* register callback func to eal lib */
> -	rte_intr_callback_register(&pci_dev->intr_handle,
> -		i40evf_dev_interrupt_handler, (void *)eth_dev);
> -
> -	/* enable uio intr after callback register */
> -	rte_intr_enable(&pci_dev->intr_handle);
> +	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
> +			  i40evf_dev_alarm_handler, eth_dev);
>  
>  	/* configure and enable device interrupt */
>  	i40evf_enable_irq0(hw);
> @@ -1836,7 +1836,7 @@ i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
>  	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
>  	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>  	uint16_t interval =
> -		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
> +		i40e_calc_itr_interval(0, 0);
>  	uint16_t msix_intr;
>  
>  	msix_intr = intr_handle->intr_vec[queue_id];
> @@ -1859,8 +1859,6 @@ i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
>  
>  	I40EVF_WRITE_FLUSH(hw);
>  
> -	rte_intr_enable(&pci_dev->intr_handle);
> -
>  	return 0;
>  }
>  
> @@ -2023,10 +2021,8 @@ i40evf_dev_start(struct rte_eth_dev *dev)
>  	 * queue interrupt to other VFIO vectors.
>  	 * So clear uio/vfio intr/evevnfd first to avoid failure.
>  	 */
> -	if (dev->data->dev_conf.intr_conf.rxq != 0) {
> -		rte_intr_disable(intr_handle);
> +	if (dev->data->dev_conf.intr_conf.rxq != 0)
>  		rte_intr_enable(intr_handle);
> -	}
>  
>  	i40evf_enable_queues_intr(dev);
>  
> @@ -2050,6 +2046,9 @@ i40evf_dev_stop(struct rte_eth_dev *dev)
>  
>  	PMD_INIT_FUNC_TRACE();
>  
> +	if (dev->data->dev_conf.intr_conf.rxq != 0)
> +		rte_intr_disable(intr_handle);
> +
>  	if (hw->adapter_stopped == 1)
>  		return;
>  	i40evf_stop_queues(dev);
> @@ -2285,9 +2284,8 @@ static void
>  i40evf_dev_close(struct rte_eth_dev *dev)
>  {
>  	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> -	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> -	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
>  
> +	rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev);
>  	i40evf_dev_stop(dev);
>  	i40e_dev_free_queues(dev);
>  	/*
> @@ -2300,12 +2298,6 @@ i40evf_dev_close(struct rte_eth_dev *dev)
>  
>  	i40evf_reset_vf(hw);
>  	i40e_shutdown_adminq(hw);
> -	/* disable uio intr before callback unregister */
> -	rte_intr_disable(intr_handle);
> -
> -	/* unregister callback func from eal lib */
> -	rte_intr_callback_unregister(intr_handle,
> -				     i40evf_dev_interrupt_handler, dev);
>  	i40evf_disable_irq0(hw);
>  }
>  

Rather than adding a polling routine internally, why not change the driver to not support
Link State or receive interrupts. Better yet, let the application decide.
Keep the interrupt logic but only enable interrupts if application has requested
LSC or recveive interrupt mode.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [PATCH v2] net/i40e: remove VF interrupt handler
  2018-06-22 15:44 ` Stephen Hemminger
@ 2018-06-24 10:56   ` Zhang, Qi Z
  2018-06-26  9:15     ` Ferruh Yigit
  0 siblings, 1 reply; 6+ messages in thread
From: Zhang, Qi Z @ 2018-06-24 10:56 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Xing, Beilei, Wu, Jingjing, Yu, De, dev

Hi Stephen:

> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Friday, June 22, 2018 11:44 PM
> To: Zhang, Qi Z <qi.z.zhang@intel.com>
> Cc: Xing, Beilei <beilei.xing@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>;
> Yu, De <de.yu@intel.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v2] net/i40e: remove VF interrupt handler
> 
> On Fri, 22 Jun 2018 08:44:14 +0800
> Qi Zhang <qi.z.zhang@intel.com> wrote:
> 
> > For i40evf, internal rx interrupt and adminq interrupt share the same
> > source, that cause a lot cpu cycles be wasted on interrupt handler on
> > rx path. This is complained by customers which require low latency
> > (when set I40E_ITR_INTERVAL to small value), but have to be sufferred
> > by tremendous interrupts handling that eat significant CPU resources.
> >
> > The patch disable pci interrupt and remove the interrupt handler,
> > replace it with a low frequency (50ms) interrupt polling daemon which
> > is implemented by registering a alarm callback periodly, this save CPU
> > time significently: On a typical x86 server with 2.1GHz CPU, with low
> > latency configure (32us) we saw CPU usage from top commmand reduced
> > from 20% to 0% on management core in testpmd).
> >
> > Also with the new method we can remove compile option:
> > I40E_ITR_INTERVAL which is used to balance between low latency and low
> CPU usage previously.
> > Now we don't need it since we can reach both at same time.
> >
> > Suggested-by: Jingjing Wu <jingjing.wu@intel.com>
> > Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
> > ---
> >
> > v2:
> > - update doc
> >
> >  config/common_base                |  2 --
> >  doc/guides/nics/i40e.rst          |  5 -----
> >  drivers/net/i40e/i40e_ethdev.c    |  3 +--
> >  drivers/net/i40e/i40e_ethdev.h    | 22 +++++++++++-----------
> >  drivers/net/i40e/i40e_ethdev_vf.c | 36
> > ++++++++++++++----------------------
> >  5 files changed, 26 insertions(+), 42 deletions(-)
> >
> > diff --git a/config/common_base b/config/common_base index
> > 6b0d1cbbb..9e21c6865 100644
> > --- a/config/common_base
> > +++ b/config/common_base
> > @@ -264,8 +264,6 @@ CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
> >  CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC=n
> >  CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF=64
> >  CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
> > -# interval up to 8160 us, aligned to 2 (or default value)
> > -CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1
> >
> >  #
> >  # Compile burst-oriented FM10K PMD
> > diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst index
> > 18549bf5a..3fc4ceac7 100644
> > --- a/doc/guides/nics/i40e.rst
> > +++ b/doc/guides/nics/i40e.rst
> > @@ -96,11 +96,6 @@ Please note that enabling debugging options may
> affect system performance.
> >
> >    Number of queues reserved for each VMDQ Pool.
> >
> > -- ``CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL`` (default ``-1``)
> > -
> > -  Interrupt Throttling interval.
> > -
> > -
> >  Runtime Config Options
> >  ~~~~~~~~~~~~~~~~~~~~~~
> >
> > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > b/drivers/net/i40e/i40e_ethdev.c index 13c5d3296..c8f9566e0 100644
> > --- a/drivers/net/i40e/i40e_ethdev.c
> > +++ b/drivers/net/i40e/i40e_ethdev.c
> > @@ -1829,8 +1829,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi,
> uint16_t msix_vect,
> >  	/* Write first RX queue to Link list register as the head element */
> >  	if (vsi->type != I40E_VSI_SRIOV) {
> >  		uint16_t interval =
> > -			i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 1,
> > -					       pf->support_multi_driver);
> > +			i40e_calc_itr_interval(1, pf->support_multi_driver);
> >
> >  		if (msix_vect == I40E_MISC_VEC_ID) {
> >  			I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, diff --git
> > a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> > index 11c4c76bd..599993dac 100644
> > --- a/drivers/net/i40e/i40e_ethdev.h
> > +++ b/drivers/net/i40e/i40e_ethdev.h
> > @@ -178,7 +178,7 @@ enum i40e_flxpld_layer_idx {
> >  #define I40E_ITR_INDEX_NONE             3
> >  #define I40E_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
> >  #define I40E_QUEUE_ITR_INTERVAL_MAX     8160 /* 8160 us */
> > -#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 8160 /* 8160 us */
> > +#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
> >  /* Special FW support this floating VEB feature */  #define
> > FLOATING_VEB_SUPPORTED_FW_MAJ 5  #define
> FLOATING_VEB_SUPPORTED_FW_MIN
> > 0 @@ -1328,17 +1328,17 @@ i40e_align_floor(int n)  }
> >
> >  static inline uint16_t
> > -i40e_calc_itr_interval(int16_t interval, bool is_pf, bool
> > is_multi_drv)
> > +i40e_calc_itr_interval(bool is_pf, bool is_multi_drv)
> >  {
> > -	if (interval < 0 || interval > I40E_QUEUE_ITR_INTERVAL_MAX) {
> > -		if (is_multi_drv) {
> > -			interval = I40E_QUEUE_ITR_INTERVAL_MAX;
> > -		} else {
> > -			if (is_pf)
> > -				interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
> > -			else
> > -				interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
> > -		}
> > +	uint16_t interval = 0;
> > +
> > +	if (is_multi_drv) {
> > +		interval = I40E_QUEUE_ITR_INTERVAL_MAX;
> > +	} else {
> > +		if (is_pf)
> > +			interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
> > +		else
> > +			interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
> >  	}
> >
> >  	/* Convert to hardware count, as writing each 1 represents 2 us */
> > diff --git a/drivers/net/i40e/i40e_ethdev_vf.c
> > b/drivers/net/i40e/i40e_ethdev_vf.c
> > index 804e44530..ad5c069e8 100644
> > --- a/drivers/net/i40e/i40e_ethdev_vf.c
> > +++ b/drivers/net/i40e/i40e_ethdev_vf.c
> > @@ -44,6 +44,8 @@
> >  #define I40EVF_BUSY_WAIT_COUNT 50
> >  #define MAX_RESET_WAIT_CNT     20
> >
> > +#define I40EVF_ALARM_INTERVAL 50000 /* us */
> > +
> >  struct i40evf_arq_msg_info {
> >  	enum virtchnl_ops ops;
> >  	enum i40e_status_code result;
> > @@ -1133,7 +1135,7 @@ i40evf_init_vf(struct rte_eth_dev *dev)
> >  	struct i40e_hw *hw =
> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >  	struct i40e_vf *vf =
> I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
> >  	uint16_t interval =
> > -		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
> > +		i40e_calc_itr_interval(0, 0);
> >
> >  	vf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> >  	vf->dev_data = dev->data;
> > @@ -1370,7 +1372,7 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev)
> >   *  void
> >   */
> >  static void
> > -i40evf_dev_interrupt_handler(void *param)
> > +i40evf_dev_alarm_handler(void *param)
> >  {
> >  	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
> >  	struct i40e_hw *hw =
> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > @@ -1399,6 +1401,8 @@ i40evf_dev_interrupt_handler(void *param)
> >
> >  done:
> >  	i40evf_enable_irq0(hw);
> > +	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
> > +			  i40evf_dev_alarm_handler, dev);
> >  }
> >
> >  static int
> > @@ -1442,12 +1446,8 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)
> >  		return -1;
> >  	}
> >
> > -	/* register callback func to eal lib */
> > -	rte_intr_callback_register(&pci_dev->intr_handle,
> > -		i40evf_dev_interrupt_handler, (void *)eth_dev);
> > -
> > -	/* enable uio intr after callback register */
> > -	rte_intr_enable(&pci_dev->intr_handle);
> > +	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
> > +			  i40evf_dev_alarm_handler, eth_dev);
> >
> >  	/* configure and enable device interrupt */
> >  	i40evf_enable_irq0(hw);
> > @@ -1836,7 +1836,7 @@ i40evf_dev_rx_queue_intr_enable(struct
> rte_eth_dev *dev, uint16_t queue_id)
> >  	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
> >  	struct i40e_hw *hw =
> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >  	uint16_t interval =
> > -		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
> > +		i40e_calc_itr_interval(0, 0);
> >  	uint16_t msix_intr;
> >
> >  	msix_intr = intr_handle->intr_vec[queue_id]; @@ -1859,8 +1859,6 @@
> > i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t
> > queue_id)
> >
> >  	I40EVF_WRITE_FLUSH(hw);
> >
> > -	rte_intr_enable(&pci_dev->intr_handle);
> > -
> >  	return 0;
> >  }
> >
> > @@ -2023,10 +2021,8 @@ i40evf_dev_start(struct rte_eth_dev *dev)
> >  	 * queue interrupt to other VFIO vectors.
> >  	 * So clear uio/vfio intr/evevnfd first to avoid failure.
> >  	 */
> > -	if (dev->data->dev_conf.intr_conf.rxq != 0) {
> > -		rte_intr_disable(intr_handle);
> > +	if (dev->data->dev_conf.intr_conf.rxq != 0)
> >  		rte_intr_enable(intr_handle);
> > -	}
> >
> >  	i40evf_enable_queues_intr(dev);
> >
> > @@ -2050,6 +2046,9 @@ i40evf_dev_stop(struct rte_eth_dev *dev)
> >
> >  	PMD_INIT_FUNC_TRACE();
> >
> > +	if (dev->data->dev_conf.intr_conf.rxq != 0)
> > +		rte_intr_disable(intr_handle);
> > +
> >  	if (hw->adapter_stopped == 1)
> >  		return;
> >  	i40evf_stop_queues(dev);
> > @@ -2285,9 +2284,8 @@ static void
> >  i40evf_dev_close(struct rte_eth_dev *dev)  {
> >  	struct i40e_hw *hw =
> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > -	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> > -	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
> >
> > +	rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev);
> >  	i40evf_dev_stop(dev);
> >  	i40e_dev_free_queues(dev);
> >  	/*
> > @@ -2300,12 +2298,6 @@ i40evf_dev_close(struct rte_eth_dev *dev)
> >
> >  	i40evf_reset_vf(hw);
> >  	i40e_shutdown_adminq(hw);
> > -	/* disable uio intr before callback unregister */
> > -	rte_intr_disable(intr_handle);
> > -
> > -	/* unregister callback func from eal lib */
> > -	rte_intr_callback_unregister(intr_handle,
> > -				     i40evf_dev_interrupt_handler, dev);
> >  	i40evf_disable_irq0(hw);
> >  }
> >
> 
> Rather than adding a polling routine internally, why not change the driver to
> not support Link State or receive interrupts. Better yet, let the application
> decide.
> Keep the interrupt logic but only enable interrupts if application has requested
> LSC or recveive interrupt mode.

The interrupt handler is not only for LSC (actually VF does not support LSC) or rx interrupt mode, 
it is used for PF to VF message through admin queue which is always required.

Regards
Qi

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [PATCH v2] net/i40e: remove VF interrupt handler
  2018-06-24 10:56   ` Zhang, Qi Z
@ 2018-06-26  9:15     ` Ferruh Yigit
  2018-06-26 10:04       ` Zhang, Qi Z
  0 siblings, 1 reply; 6+ messages in thread
From: Ferruh Yigit @ 2018-06-26  9:15 UTC (permalink / raw)
  To: Zhang, Qi Z, Stephen Hemminger; +Cc: Xing, Beilei, Wu, Jingjing, Yu, De, dev

On 6/24/2018 11:56 AM, Zhang, Qi Z wrote:
> Hi Stephen:
> 
>> -----Original Message-----
>> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
>> Sent: Friday, June 22, 2018 11:44 PM
>> To: Zhang, Qi Z <qi.z.zhang@intel.com>
>> Cc: Xing, Beilei <beilei.xing@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>;
>> Yu, De <de.yu@intel.com>; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v2] net/i40e: remove VF interrupt handler
>>
>> On Fri, 22 Jun 2018 08:44:14 +0800
>> Qi Zhang <qi.z.zhang@intel.com> wrote:
>>
>>> For i40evf, internal rx interrupt and adminq interrupt share the same
>>> source, that cause a lot cpu cycles be wasted on interrupt handler on
>>> rx path. This is complained by customers which require low latency
>>> (when set I40E_ITR_INTERVAL to small value), but have to be sufferred
>>> by tremendous interrupts handling that eat significant CPU resources.
>>>
>>> The patch disable pci interrupt and remove the interrupt handler,
>>> replace it with a low frequency (50ms) interrupt polling daemon which
>>> is implemented by registering a alarm callback periodly, this save CPU
>>> time significently: On a typical x86 server with 2.1GHz CPU, with low
>>> latency configure (32us) we saw CPU usage from top commmand reduced
>>> from 20% to 0% on management core in testpmd).
>>>
>>> Also with the new method we can remove compile option:
>>> I40E_ITR_INTERVAL which is used to balance between low latency and low
>> CPU usage previously.
>>> Now we don't need it since we can reach both at same time.
>>>
>>> Suggested-by: Jingjing Wu <jingjing.wu@intel.com>
>>> Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
>>> ---
>>>
>>> v2:
>>> - update doc
>>>
>>>  config/common_base                |  2 --
>>>  doc/guides/nics/i40e.rst          |  5 -----
>>>  drivers/net/i40e/i40e_ethdev.c    |  3 +--
>>>  drivers/net/i40e/i40e_ethdev.h    | 22 +++++++++++-----------
>>>  drivers/net/i40e/i40e_ethdev_vf.c | 36
>>> ++++++++++++++----------------------
>>>  5 files changed, 26 insertions(+), 42 deletions(-)
>>>
>>> diff --git a/config/common_base b/config/common_base index
>>> 6b0d1cbbb..9e21c6865 100644
>>> --- a/config/common_base
>>> +++ b/config/common_base
>>> @@ -264,8 +264,6 @@ CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
>>>  CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC=n
>>>  CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF=64
>>>  CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
>>> -# interval up to 8160 us, aligned to 2 (or default value)
>>> -CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1
>>>
>>>  #
>>>  # Compile burst-oriented FM10K PMD
>>> diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst index
>>> 18549bf5a..3fc4ceac7 100644
>>> --- a/doc/guides/nics/i40e.rst
>>> +++ b/doc/guides/nics/i40e.rst
>>> @@ -96,11 +96,6 @@ Please note that enabling debugging options may
>> affect system performance.
>>>
>>>    Number of queues reserved for each VMDQ Pool.
>>>
>>> -- ``CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL`` (default ``-1``)
>>> -
>>> -  Interrupt Throttling interval.
>>> -
>>> -
>>>  Runtime Config Options
>>>  ~~~~~~~~~~~~~~~~~~~~~~
>>>
>>> diff --git a/drivers/net/i40e/i40e_ethdev.c
>>> b/drivers/net/i40e/i40e_ethdev.c index 13c5d3296..c8f9566e0 100644
>>> --- a/drivers/net/i40e/i40e_ethdev.c
>>> +++ b/drivers/net/i40e/i40e_ethdev.c
>>> @@ -1829,8 +1829,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi,
>> uint16_t msix_vect,
>>>  	/* Write first RX queue to Link list register as the head element */
>>>  	if (vsi->type != I40E_VSI_SRIOV) {
>>>  		uint16_t interval =
>>> -			i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 1,
>>> -					       pf->support_multi_driver);
>>> +			i40e_calc_itr_interval(1, pf->support_multi_driver);
>>>
>>>  		if (msix_vect == I40E_MISC_VEC_ID) {
>>>  			I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, diff --git
>>> a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
>>> index 11c4c76bd..599993dac 100644
>>> --- a/drivers/net/i40e/i40e_ethdev.h
>>> +++ b/drivers/net/i40e/i40e_ethdev.h
>>> @@ -178,7 +178,7 @@ enum i40e_flxpld_layer_idx {
>>>  #define I40E_ITR_INDEX_NONE             3
>>>  #define I40E_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
>>>  #define I40E_QUEUE_ITR_INTERVAL_MAX     8160 /* 8160 us */
>>> -#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 8160 /* 8160 us */
>>> +#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
>>>  /* Special FW support this floating VEB feature */  #define
>>> FLOATING_VEB_SUPPORTED_FW_MAJ 5  #define
>> FLOATING_VEB_SUPPORTED_FW_MIN
>>> 0 @@ -1328,17 +1328,17 @@ i40e_align_floor(int n)  }
>>>
>>>  static inline uint16_t
>>> -i40e_calc_itr_interval(int16_t interval, bool is_pf, bool
>>> is_multi_drv)
>>> +i40e_calc_itr_interval(bool is_pf, bool is_multi_drv)
>>>  {
>>> -	if (interval < 0 || interval > I40E_QUEUE_ITR_INTERVAL_MAX) {
>>> -		if (is_multi_drv) {
>>> -			interval = I40E_QUEUE_ITR_INTERVAL_MAX;
>>> -		} else {
>>> -			if (is_pf)
>>> -				interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
>>> -			else
>>> -				interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
>>> -		}
>>> +	uint16_t interval = 0;
>>> +
>>> +	if (is_multi_drv) {
>>> +		interval = I40E_QUEUE_ITR_INTERVAL_MAX;
>>> +	} else {
>>> +		if (is_pf)
>>> +			interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
>>> +		else
>>> +			interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
>>>  	}
>>>
>>>  	/* Convert to hardware count, as writing each 1 represents 2 us */
>>> diff --git a/drivers/net/i40e/i40e_ethdev_vf.c
>>> b/drivers/net/i40e/i40e_ethdev_vf.c
>>> index 804e44530..ad5c069e8 100644
>>> --- a/drivers/net/i40e/i40e_ethdev_vf.c
>>> +++ b/drivers/net/i40e/i40e_ethdev_vf.c
>>> @@ -44,6 +44,8 @@
>>>  #define I40EVF_BUSY_WAIT_COUNT 50
>>>  #define MAX_RESET_WAIT_CNT     20
>>>
>>> +#define I40EVF_ALARM_INTERVAL 50000 /* us */
>>> +
>>>  struct i40evf_arq_msg_info {
>>>  	enum virtchnl_ops ops;
>>>  	enum i40e_status_code result;
>>> @@ -1133,7 +1135,7 @@ i40evf_init_vf(struct rte_eth_dev *dev)
>>>  	struct i40e_hw *hw =
>> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>>  	struct i40e_vf *vf =
>> I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
>>>  	uint16_t interval =
>>> -		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
>>> +		i40e_calc_itr_interval(0, 0);
>>>
>>>  	vf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
>>>  	vf->dev_data = dev->data;
>>> @@ -1370,7 +1372,7 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev)
>>>   *  void
>>>   */
>>>  static void
>>> -i40evf_dev_interrupt_handler(void *param)
>>> +i40evf_dev_alarm_handler(void *param)
>>>  {
>>>  	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
>>>  	struct i40e_hw *hw =
>> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>> @@ -1399,6 +1401,8 @@ i40evf_dev_interrupt_handler(void *param)
>>>
>>>  done:
>>>  	i40evf_enable_irq0(hw);
>>> +	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
>>> +			  i40evf_dev_alarm_handler, dev);
>>>  }
>>>
>>>  static int
>>> @@ -1442,12 +1446,8 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)
>>>  		return -1;
>>>  	}
>>>
>>> -	/* register callback func to eal lib */
>>> -	rte_intr_callback_register(&pci_dev->intr_handle,
>>> -		i40evf_dev_interrupt_handler, (void *)eth_dev);
>>> -
>>> -	/* enable uio intr after callback register */
>>> -	rte_intr_enable(&pci_dev->intr_handle);
>>> +	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
>>> +			  i40evf_dev_alarm_handler, eth_dev);
>>>
>>>  	/* configure and enable device interrupt */
>>>  	i40evf_enable_irq0(hw);
>>> @@ -1836,7 +1836,7 @@ i40evf_dev_rx_queue_intr_enable(struct
>> rte_eth_dev *dev, uint16_t queue_id)
>>>  	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
>>>  	struct i40e_hw *hw =
>> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>>  	uint16_t interval =
>>> -		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
>>> +		i40e_calc_itr_interval(0, 0);
>>>  	uint16_t msix_intr;
>>>
>>>  	msix_intr = intr_handle->intr_vec[queue_id]; @@ -1859,8 +1859,6 @@
>>> i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t
>>> queue_id)
>>>
>>>  	I40EVF_WRITE_FLUSH(hw);
>>>
>>> -	rte_intr_enable(&pci_dev->intr_handle);
>>> -
>>>  	return 0;
>>>  }
>>>
>>> @@ -2023,10 +2021,8 @@ i40evf_dev_start(struct rte_eth_dev *dev)
>>>  	 * queue interrupt to other VFIO vectors.
>>>  	 * So clear uio/vfio intr/evevnfd first to avoid failure.
>>>  	 */
>>> -	if (dev->data->dev_conf.intr_conf.rxq != 0) {
>>> -		rte_intr_disable(intr_handle);
>>> +	if (dev->data->dev_conf.intr_conf.rxq != 0)
>>>  		rte_intr_enable(intr_handle);
>>> -	}
>>>
>>>  	i40evf_enable_queues_intr(dev);
>>>
>>> @@ -2050,6 +2046,9 @@ i40evf_dev_stop(struct rte_eth_dev *dev)
>>>
>>>  	PMD_INIT_FUNC_TRACE();
>>>
>>> +	if (dev->data->dev_conf.intr_conf.rxq != 0)
>>> +		rte_intr_disable(intr_handle);
>>> +
>>>  	if (hw->adapter_stopped == 1)
>>>  		return;
>>>  	i40evf_stop_queues(dev);
>>> @@ -2285,9 +2284,8 @@ static void
>>>  i40evf_dev_close(struct rte_eth_dev *dev)  {
>>>  	struct i40e_hw *hw =
>> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>> -	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
>>> -	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
>>>
>>> +	rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev);
>>>  	i40evf_dev_stop(dev);
>>>  	i40e_dev_free_queues(dev);
>>>  	/*
>>> @@ -2300,12 +2298,6 @@ i40evf_dev_close(struct rte_eth_dev *dev)
>>>
>>>  	i40evf_reset_vf(hw);
>>>  	i40e_shutdown_adminq(hw);
>>> -	/* disable uio intr before callback unregister */
>>> -	rte_intr_disable(intr_handle);
>>> -
>>> -	/* unregister callback func from eal lib */
>>> -	rte_intr_callback_unregister(intr_handle,
>>> -				     i40evf_dev_interrupt_handler, dev);
>>>  	i40evf_disable_irq0(hw);
>>>  }
>>>
>>
>> Rather than adding a polling routine internally, why not change the driver to
>> not support Link State or receive interrupts. Better yet, let the application
>> decide.
>> Keep the interrupt logic but only enable interrupts if application has requested
>> LSC or recveive interrupt mode.
> 
> The interrupt handler is not only for LSC (actually VF does not support LSC) or rx interrupt mode, 
> it is used for PF to VF message through admin queue which is always required.

I guess the question is, is it possible to disable Rx interrupts? And if
possible can user control this enable/disable per interrupt source?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [PATCH v2] net/i40e: remove VF interrupt handler
  2018-06-26  9:15     ` Ferruh Yigit
@ 2018-06-26 10:04       ` Zhang, Qi Z
  2018-06-26 10:10         ` Ferruh Yigit
  0 siblings, 1 reply; 6+ messages in thread
From: Zhang, Qi Z @ 2018-06-26 10:04 UTC (permalink / raw)
  To: Yigit, Ferruh, Stephen Hemminger; +Cc: Xing, Beilei, Wu, Jingjing, Yu, De, dev



> -----Original Message-----
> From: Yigit, Ferruh
> Sent: Tuesday, June 26, 2018 5:15 PM
> To: Zhang, Qi Z <qi.z.zhang@intel.com>; Stephen Hemminger
> <stephen@networkplumber.org>
> Cc: Xing, Beilei <beilei.xing@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>;
> Yu, De <de.yu@intel.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v2] net/i40e: remove VF interrupt handler
> 
> On 6/24/2018 11:56 AM, Zhang, Qi Z wrote:
> > Hi Stephen:
> >
> >> -----Original Message-----
> >> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> >> Sent: Friday, June 22, 2018 11:44 PM
> >> To: Zhang, Qi Z <qi.z.zhang@intel.com>
> >> Cc: Xing, Beilei <beilei.xing@intel.com>; Wu, Jingjing
> >> <jingjing.wu@intel.com>; Yu, De <de.yu@intel.com>; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v2] net/i40e: remove VF interrupt
> >> handler
> >>
> >> On Fri, 22 Jun 2018 08:44:14 +0800
> >> Qi Zhang <qi.z.zhang@intel.com> wrote:
> >>
> >>> For i40evf, internal rx interrupt and adminq interrupt share the
> >>> same source, that cause a lot cpu cycles be wasted on interrupt
> >>> handler on rx path. This is complained by customers which require
> >>> low latency (when set I40E_ITR_INTERVAL to small value), but have to
> >>> be sufferred by tremendous interrupts handling that eat significant CPU
> resources.
> >>>
> >>> The patch disable pci interrupt and remove the interrupt handler,
> >>> replace it with a low frequency (50ms) interrupt polling daemon
> >>> which is implemented by registering a alarm callback periodly, this
> >>> save CPU time significently: On a typical x86 server with 2.1GHz
> >>> CPU, with low latency configure (32us) we saw CPU usage from top
> >>> commmand reduced from 20% to 0% on management core in testpmd).
> >>>
> >>> Also with the new method we can remove compile option:
> >>> I40E_ITR_INTERVAL which is used to balance between low latency and
> >>> low
> >> CPU usage previously.
> >>> Now we don't need it since we can reach both at same time.
> >>>
> >>> Suggested-by: Jingjing Wu <jingjing.wu@intel.com>
> >>> Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
> >>> ---
> >>>
> >>> v2:
> >>> - update doc
> >>>
> >>>  config/common_base                |  2 --
> >>>  doc/guides/nics/i40e.rst          |  5 -----
> >>>  drivers/net/i40e/i40e_ethdev.c    |  3 +--
> >>>  drivers/net/i40e/i40e_ethdev.h    | 22 +++++++++++-----------
> >>>  drivers/net/i40e/i40e_ethdev_vf.c | 36
> >>> ++++++++++++++----------------------
> >>>  5 files changed, 26 insertions(+), 42 deletions(-)
> >>>
> >>> diff --git a/config/common_base b/config/common_base index
> >>> 6b0d1cbbb..9e21c6865 100644
> >>> --- a/config/common_base
> >>> +++ b/config/common_base
> >>> @@ -264,8 +264,6 @@ CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
> >>>  CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC=n
> >>>  CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF=64
> >>>  CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
> >>> -# interval up to 8160 us, aligned to 2 (or default value)
> >>> -CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1
> >>>
> >>>  #
> >>>  # Compile burst-oriented FM10K PMD
> >>> diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
> >>> index
> >>> 18549bf5a..3fc4ceac7 100644
> >>> --- a/doc/guides/nics/i40e.rst
> >>> +++ b/doc/guides/nics/i40e.rst
> >>> @@ -96,11 +96,6 @@ Please note that enabling debugging options may
> >> affect system performance.
> >>>
> >>>    Number of queues reserved for each VMDQ Pool.
> >>>
> >>> -- ``CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL`` (default ``-1``)
> >>> -
> >>> -  Interrupt Throttling interval.
> >>> -
> >>> -
> >>>  Runtime Config Options
> >>>  ~~~~~~~~~~~~~~~~~~~~~~
> >>>
> >>> diff --git a/drivers/net/i40e/i40e_ethdev.c
> >>> b/drivers/net/i40e/i40e_ethdev.c index 13c5d3296..c8f9566e0 100644
> >>> --- a/drivers/net/i40e/i40e_ethdev.c
> >>> +++ b/drivers/net/i40e/i40e_ethdev.c
> >>> @@ -1829,8 +1829,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi,
> >> uint16_t msix_vect,
> >>>  	/* Write first RX queue to Link list register as the head element */
> >>>  	if (vsi->type != I40E_VSI_SRIOV) {
> >>>  		uint16_t interval =
> >>> -			i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 1,
> >>> -					       pf->support_multi_driver);
> >>> +			i40e_calc_itr_interval(1, pf->support_multi_driver);
> >>>
> >>>  		if (msix_vect == I40E_MISC_VEC_ID) {
> >>>  			I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, diff --git
> >>> a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> >>> index 11c4c76bd..599993dac 100644
> >>> --- a/drivers/net/i40e/i40e_ethdev.h
> >>> +++ b/drivers/net/i40e/i40e_ethdev.h
> >>> @@ -178,7 +178,7 @@ enum i40e_flxpld_layer_idx {
> >>>  #define I40E_ITR_INDEX_NONE             3
> >>>  #define I40E_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
> >>>  #define I40E_QUEUE_ITR_INTERVAL_MAX     8160 /* 8160 us */
> >>> -#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 8160 /* 8160 us */
> >>> +#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
> >>>  /* Special FW support this floating VEB feature */  #define
> >>> FLOATING_VEB_SUPPORTED_FW_MAJ 5  #define
> >> FLOATING_VEB_SUPPORTED_FW_MIN
> >>> 0 @@ -1328,17 +1328,17 @@ i40e_align_floor(int n)  }
> >>>
> >>>  static inline uint16_t
> >>> -i40e_calc_itr_interval(int16_t interval, bool is_pf, bool
> >>> is_multi_drv)
> >>> +i40e_calc_itr_interval(bool is_pf, bool is_multi_drv)
> >>>  {
> >>> -	if (interval < 0 || interval > I40E_QUEUE_ITR_INTERVAL_MAX) {
> >>> -		if (is_multi_drv) {
> >>> -			interval = I40E_QUEUE_ITR_INTERVAL_MAX;
> >>> -		} else {
> >>> -			if (is_pf)
> >>> -				interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
> >>> -			else
> >>> -				interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
> >>> -		}
> >>> +	uint16_t interval = 0;
> >>> +
> >>> +	if (is_multi_drv) {
> >>> +		interval = I40E_QUEUE_ITR_INTERVAL_MAX;
> >>> +	} else {
> >>> +		if (is_pf)
> >>> +			interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
> >>> +		else
> >>> +			interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
> >>>  	}
> >>>
> >>>  	/* Convert to hardware count, as writing each 1 represents 2 us */
> >>> diff --git a/drivers/net/i40e/i40e_ethdev_vf.c
> >>> b/drivers/net/i40e/i40e_ethdev_vf.c
> >>> index 804e44530..ad5c069e8 100644
> >>> --- a/drivers/net/i40e/i40e_ethdev_vf.c
> >>> +++ b/drivers/net/i40e/i40e_ethdev_vf.c
> >>> @@ -44,6 +44,8 @@
> >>>  #define I40EVF_BUSY_WAIT_COUNT 50
> >>>  #define MAX_RESET_WAIT_CNT     20
> >>>
> >>> +#define I40EVF_ALARM_INTERVAL 50000 /* us */
> >>> +
> >>>  struct i40evf_arq_msg_info {
> >>>  	enum virtchnl_ops ops;
> >>>  	enum i40e_status_code result;
> >>> @@ -1133,7 +1135,7 @@ i40evf_init_vf(struct rte_eth_dev *dev)
> >>>  	struct i40e_hw *hw =
> >> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>>  	struct i40e_vf *vf =
> >> I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
> >>>  	uint16_t interval =
> >>> -		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
> >>> +		i40e_calc_itr_interval(0, 0);
> >>>
> >>>  	vf->adapter =
> I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> >>>  	vf->dev_data = dev->data;
> >>> @@ -1370,7 +1372,7 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev)
> >>>   *  void
> >>>   */
> >>>  static void
> >>> -i40evf_dev_interrupt_handler(void *param)
> >>> +i40evf_dev_alarm_handler(void *param)
> >>>  {
> >>>  	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
> >>>  	struct i40e_hw *hw =
> >> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>> @@ -1399,6 +1401,8 @@ i40evf_dev_interrupt_handler(void *param)
> >>>
> >>>  done:
> >>>  	i40evf_enable_irq0(hw);
> >>> +	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
> >>> +			  i40evf_dev_alarm_handler, dev);
> >>>  }
> >>>
> >>>  static int
> >>> @@ -1442,12 +1446,8 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)
> >>>  		return -1;
> >>>  	}
> >>>
> >>> -	/* register callback func to eal lib */
> >>> -	rte_intr_callback_register(&pci_dev->intr_handle,
> >>> -		i40evf_dev_interrupt_handler, (void *)eth_dev);
> >>> -
> >>> -	/* enable uio intr after callback register */
> >>> -	rte_intr_enable(&pci_dev->intr_handle);
> >>> +	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
> >>> +			  i40evf_dev_alarm_handler, eth_dev);
> >>>
> >>>  	/* configure and enable device interrupt */
> >>>  	i40evf_enable_irq0(hw);
> >>> @@ -1836,7 +1836,7 @@ i40evf_dev_rx_queue_intr_enable(struct
> >> rte_eth_dev *dev, uint16_t queue_id)
> >>>  	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
> >>>  	struct i40e_hw *hw =
> >> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>>  	uint16_t interval =
> >>> -		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
> >>> +		i40e_calc_itr_interval(0, 0);
> >>>  	uint16_t msix_intr;
> >>>
> >>>  	msix_intr = intr_handle->intr_vec[queue_id]; @@ -1859,8 +1859,6
> @@
> >>> i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t
> >>> queue_id)
> >>>
> >>>  	I40EVF_WRITE_FLUSH(hw);
> >>>
> >>> -	rte_intr_enable(&pci_dev->intr_handle);
> >>> -
> >>>  	return 0;
> >>>  }
> >>>
> >>> @@ -2023,10 +2021,8 @@ i40evf_dev_start(struct rte_eth_dev *dev)
> >>>  	 * queue interrupt to other VFIO vectors.
> >>>  	 * So clear uio/vfio intr/evevnfd first to avoid failure.
> >>>  	 */
> >>> -	if (dev->data->dev_conf.intr_conf.rxq != 0) {
> >>> -		rte_intr_disable(intr_handle);
> >>> +	if (dev->data->dev_conf.intr_conf.rxq != 0)
> >>>  		rte_intr_enable(intr_handle);
> >>> -	}
> >>>
> >>>  	i40evf_enable_queues_intr(dev);
> >>>
> >>> @@ -2050,6 +2046,9 @@ i40evf_dev_stop(struct rte_eth_dev *dev)
> >>>
> >>>  	PMD_INIT_FUNC_TRACE();
> >>>
> >>> +	if (dev->data->dev_conf.intr_conf.rxq != 0)
> >>> +		rte_intr_disable(intr_handle);
> >>> +
> >>>  	if (hw->adapter_stopped == 1)
> >>>  		return;
> >>>  	i40evf_stop_queues(dev);
> >>> @@ -2285,9 +2284,8 @@ static void
> >>>  i40evf_dev_close(struct rte_eth_dev *dev)  {
> >>>  	struct i40e_hw *hw =
> >> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>> -	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> >>> -	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
> >>>
> >>> +	rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev);
> >>>  	i40evf_dev_stop(dev);
> >>>  	i40e_dev_free_queues(dev);
> >>>  	/*
> >>> @@ -2300,12 +2298,6 @@ i40evf_dev_close(struct rte_eth_dev *dev)
> >>>
> >>>  	i40evf_reset_vf(hw);
> >>>  	i40e_shutdown_adminq(hw);
> >>> -	/* disable uio intr before callback unregister */
> >>> -	rte_intr_disable(intr_handle);
> >>> -
> >>> -	/* unregister callback func from eal lib */
> >>> -	rte_intr_callback_unregister(intr_handle,
> >>> -				     i40evf_dev_interrupt_handler, dev);
> >>>  	i40evf_disable_irq0(hw);
> >>>  }
> >>>
> >>
> >> Rather than adding a polling routine internally, why not change the
> >> driver to not support Link State or receive interrupts. Better yet,
> >> let the application decide.
> >> Keep the interrupt logic but only enable interrupts if application
> >> has requested LSC or recveive interrupt mode.
> >
> > The interrupt handler is not only for LSC (actually VF does not
> > support LSC) or rx interrupt mode, it is used for PF to VF message through
> admin queue which is always required.
> 
> I guess the question is, is it possible to disable Rx interrupts? And if possible
> can user control this enable/disable per interrupt source?

The problem is Rx interrupt is shared with admin queue interrupt, they are enable/disable together.
So if we want to get admin queue message from interrupt , we have to suffer the massive interrupt handling.
But admin queue used for pf/vf channel is supposed can't be closed, (though it can be delayed a little bit)

Regards
Qi



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [PATCH v2] net/i40e: remove VF interrupt handler
  2018-06-26 10:04       ` Zhang, Qi Z
@ 2018-06-26 10:10         ` Ferruh Yigit
  0 siblings, 0 replies; 6+ messages in thread
From: Ferruh Yigit @ 2018-06-26 10:10 UTC (permalink / raw)
  To: Zhang, Qi Z, Stephen Hemminger; +Cc: Xing, Beilei, Wu, Jingjing, Yu, De, dev

On 6/26/2018 11:04 AM, Zhang, Qi Z wrote:
> 
> 
>> -----Original Message-----
>> From: Yigit, Ferruh
>> Sent: Tuesday, June 26, 2018 5:15 PM
>> To: Zhang, Qi Z <qi.z.zhang@intel.com>; Stephen Hemminger
>> <stephen@networkplumber.org>
>> Cc: Xing, Beilei <beilei.xing@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>;
>> Yu, De <de.yu@intel.com>; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v2] net/i40e: remove VF interrupt handler
>>
>> On 6/24/2018 11:56 AM, Zhang, Qi Z wrote:
>>> Hi Stephen:
>>>
>>>> -----Original Message-----
>>>> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
>>>> Sent: Friday, June 22, 2018 11:44 PM
>>>> To: Zhang, Qi Z <qi.z.zhang@intel.com>
>>>> Cc: Xing, Beilei <beilei.xing@intel.com>; Wu, Jingjing
>>>> <jingjing.wu@intel.com>; Yu, De <de.yu@intel.com>; dev@dpdk.org
>>>> Subject: Re: [dpdk-dev] [PATCH v2] net/i40e: remove VF interrupt
>>>> handler
>>>>
>>>> On Fri, 22 Jun 2018 08:44:14 +0800
>>>> Qi Zhang <qi.z.zhang@intel.com> wrote:
>>>>
>>>>> For i40evf, internal rx interrupt and adminq interrupt share the
>>>>> same source, that cause a lot cpu cycles be wasted on interrupt
>>>>> handler on rx path. This is complained by customers which require
>>>>> low latency (when set I40E_ITR_INTERVAL to small value), but have to
>>>>> be sufferred by tremendous interrupts handling that eat significant CPU
>> resources.
>>>>>
>>>>> The patch disable pci interrupt and remove the interrupt handler,
>>>>> replace it with a low frequency (50ms) interrupt polling daemon
>>>>> which is implemented by registering a alarm callback periodly, this
>>>>> save CPU time significently: On a typical x86 server with 2.1GHz
>>>>> CPU, with low latency configure (32us) we saw CPU usage from top
>>>>> commmand reduced from 20% to 0% on management core in testpmd).
>>>>>
>>>>> Also with the new method we can remove compile option:
>>>>> I40E_ITR_INTERVAL which is used to balance between low latency and
>>>>> low
>>>> CPU usage previously.
>>>>> Now we don't need it since we can reach both at same time.
>>>>>
>>>>> Suggested-by: Jingjing Wu <jingjing.wu@intel.com>
>>>>> Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
>>>>> ---
>>>>>
>>>>> v2:
>>>>> - update doc
>>>>>
>>>>>  config/common_base                |  2 --
>>>>>  doc/guides/nics/i40e.rst          |  5 -----
>>>>>  drivers/net/i40e/i40e_ethdev.c    |  3 +--
>>>>>  drivers/net/i40e/i40e_ethdev.h    | 22 +++++++++++-----------
>>>>>  drivers/net/i40e/i40e_ethdev_vf.c | 36
>>>>> ++++++++++++++----------------------
>>>>>  5 files changed, 26 insertions(+), 42 deletions(-)
>>>>>
>>>>> diff --git a/config/common_base b/config/common_base index
>>>>> 6b0d1cbbb..9e21c6865 100644
>>>>> --- a/config/common_base
>>>>> +++ b/config/common_base
>>>>> @@ -264,8 +264,6 @@ CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
>>>>>  CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC=n
>>>>>  CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF=64
>>>>>  CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
>>>>> -# interval up to 8160 us, aligned to 2 (or default value)
>>>>> -CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1
>>>>>
>>>>>  #
>>>>>  # Compile burst-oriented FM10K PMD
>>>>> diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
>>>>> index
>>>>> 18549bf5a..3fc4ceac7 100644
>>>>> --- a/doc/guides/nics/i40e.rst
>>>>> +++ b/doc/guides/nics/i40e.rst
>>>>> @@ -96,11 +96,6 @@ Please note that enabling debugging options may
>>>> affect system performance.
>>>>>
>>>>>    Number of queues reserved for each VMDQ Pool.
>>>>>
>>>>> -- ``CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL`` (default ``-1``)
>>>>> -
>>>>> -  Interrupt Throttling interval.
>>>>> -
>>>>> -
>>>>>  Runtime Config Options
>>>>>  ~~~~~~~~~~~~~~~~~~~~~~
>>>>>
>>>>> diff --git a/drivers/net/i40e/i40e_ethdev.c
>>>>> b/drivers/net/i40e/i40e_ethdev.c index 13c5d3296..c8f9566e0 100644
>>>>> --- a/drivers/net/i40e/i40e_ethdev.c
>>>>> +++ b/drivers/net/i40e/i40e_ethdev.c
>>>>> @@ -1829,8 +1829,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi,
>>>> uint16_t msix_vect,
>>>>>  	/* Write first RX queue to Link list register as the head element */
>>>>>  	if (vsi->type != I40E_VSI_SRIOV) {
>>>>>  		uint16_t interval =
>>>>> -			i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 1,
>>>>> -					       pf->support_multi_driver);
>>>>> +			i40e_calc_itr_interval(1, pf->support_multi_driver);
>>>>>
>>>>>  		if (msix_vect == I40E_MISC_VEC_ID) {
>>>>>  			I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, diff --git
>>>>> a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
>>>>> index 11c4c76bd..599993dac 100644
>>>>> --- a/drivers/net/i40e/i40e_ethdev.h
>>>>> +++ b/drivers/net/i40e/i40e_ethdev.h
>>>>> @@ -178,7 +178,7 @@ enum i40e_flxpld_layer_idx {
>>>>>  #define I40E_ITR_INDEX_NONE             3
>>>>>  #define I40E_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
>>>>>  #define I40E_QUEUE_ITR_INTERVAL_MAX     8160 /* 8160 us */
>>>>> -#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 8160 /* 8160 us */
>>>>> +#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
>>>>>  /* Special FW support this floating VEB feature */  #define
>>>>> FLOATING_VEB_SUPPORTED_FW_MAJ 5  #define
>>>> FLOATING_VEB_SUPPORTED_FW_MIN
>>>>> 0 @@ -1328,17 +1328,17 @@ i40e_align_floor(int n)  }
>>>>>
>>>>>  static inline uint16_t
>>>>> -i40e_calc_itr_interval(int16_t interval, bool is_pf, bool
>>>>> is_multi_drv)
>>>>> +i40e_calc_itr_interval(bool is_pf, bool is_multi_drv)
>>>>>  {
>>>>> -	if (interval < 0 || interval > I40E_QUEUE_ITR_INTERVAL_MAX) {
>>>>> -		if (is_multi_drv) {
>>>>> -			interval = I40E_QUEUE_ITR_INTERVAL_MAX;
>>>>> -		} else {
>>>>> -			if (is_pf)
>>>>> -				interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
>>>>> -			else
>>>>> -				interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
>>>>> -		}
>>>>> +	uint16_t interval = 0;
>>>>> +
>>>>> +	if (is_multi_drv) {
>>>>> +		interval = I40E_QUEUE_ITR_INTERVAL_MAX;
>>>>> +	} else {
>>>>> +		if (is_pf)
>>>>> +			interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
>>>>> +		else
>>>>> +			interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
>>>>>  	}
>>>>>
>>>>>  	/* Convert to hardware count, as writing each 1 represents 2 us */
>>>>> diff --git a/drivers/net/i40e/i40e_ethdev_vf.c
>>>>> b/drivers/net/i40e/i40e_ethdev_vf.c
>>>>> index 804e44530..ad5c069e8 100644
>>>>> --- a/drivers/net/i40e/i40e_ethdev_vf.c
>>>>> +++ b/drivers/net/i40e/i40e_ethdev_vf.c
>>>>> @@ -44,6 +44,8 @@
>>>>>  #define I40EVF_BUSY_WAIT_COUNT 50
>>>>>  #define MAX_RESET_WAIT_CNT     20
>>>>>
>>>>> +#define I40EVF_ALARM_INTERVAL 50000 /* us */
>>>>> +
>>>>>  struct i40evf_arq_msg_info {
>>>>>  	enum virtchnl_ops ops;
>>>>>  	enum i40e_status_code result;
>>>>> @@ -1133,7 +1135,7 @@ i40evf_init_vf(struct rte_eth_dev *dev)
>>>>>  	struct i40e_hw *hw =
>>>> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>>>>  	struct i40e_vf *vf =
>>>> I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
>>>>>  	uint16_t interval =
>>>>> -		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
>>>>> +		i40e_calc_itr_interval(0, 0);
>>>>>
>>>>>  	vf->adapter =
>> I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
>>>>>  	vf->dev_data = dev->data;
>>>>> @@ -1370,7 +1372,7 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev)
>>>>>   *  void
>>>>>   */
>>>>>  static void
>>>>> -i40evf_dev_interrupt_handler(void *param)
>>>>> +i40evf_dev_alarm_handler(void *param)
>>>>>  {
>>>>>  	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
>>>>>  	struct i40e_hw *hw =
>>>> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>>>> @@ -1399,6 +1401,8 @@ i40evf_dev_interrupt_handler(void *param)
>>>>>
>>>>>  done:
>>>>>  	i40evf_enable_irq0(hw);
>>>>> +	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
>>>>> +			  i40evf_dev_alarm_handler, dev);
>>>>>  }
>>>>>
>>>>>  static int
>>>>> @@ -1442,12 +1446,8 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)
>>>>>  		return -1;
>>>>>  	}
>>>>>
>>>>> -	/* register callback func to eal lib */
>>>>> -	rte_intr_callback_register(&pci_dev->intr_handle,
>>>>> -		i40evf_dev_interrupt_handler, (void *)eth_dev);
>>>>> -
>>>>> -	/* enable uio intr after callback register */
>>>>> -	rte_intr_enable(&pci_dev->intr_handle);
>>>>> +	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
>>>>> +			  i40evf_dev_alarm_handler, eth_dev);
>>>>>
>>>>>  	/* configure and enable device interrupt */
>>>>>  	i40evf_enable_irq0(hw);
>>>>> @@ -1836,7 +1836,7 @@ i40evf_dev_rx_queue_intr_enable(struct
>>>> rte_eth_dev *dev, uint16_t queue_id)
>>>>>  	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
>>>>>  	struct i40e_hw *hw =
>>>> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>>>>  	uint16_t interval =
>>>>> -		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
>>>>> +		i40e_calc_itr_interval(0, 0);
>>>>>  	uint16_t msix_intr;
>>>>>
>>>>>  	msix_intr = intr_handle->intr_vec[queue_id]; @@ -1859,8 +1859,6
>> @@
>>>>> i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t
>>>>> queue_id)
>>>>>
>>>>>  	I40EVF_WRITE_FLUSH(hw);
>>>>>
>>>>> -	rte_intr_enable(&pci_dev->intr_handle);
>>>>> -
>>>>>  	return 0;
>>>>>  }
>>>>>
>>>>> @@ -2023,10 +2021,8 @@ i40evf_dev_start(struct rte_eth_dev *dev)
>>>>>  	 * queue interrupt to other VFIO vectors.
>>>>>  	 * So clear uio/vfio intr/evevnfd first to avoid failure.
>>>>>  	 */
>>>>> -	if (dev->data->dev_conf.intr_conf.rxq != 0) {
>>>>> -		rte_intr_disable(intr_handle);
>>>>> +	if (dev->data->dev_conf.intr_conf.rxq != 0)
>>>>>  		rte_intr_enable(intr_handle);
>>>>> -	}
>>>>>
>>>>>  	i40evf_enable_queues_intr(dev);
>>>>>
>>>>> @@ -2050,6 +2046,9 @@ i40evf_dev_stop(struct rte_eth_dev *dev)
>>>>>
>>>>>  	PMD_INIT_FUNC_TRACE();
>>>>>
>>>>> +	if (dev->data->dev_conf.intr_conf.rxq != 0)
>>>>> +		rte_intr_disable(intr_handle);
>>>>> +
>>>>>  	if (hw->adapter_stopped == 1)
>>>>>  		return;
>>>>>  	i40evf_stop_queues(dev);
>>>>> @@ -2285,9 +2284,8 @@ static void
>>>>>  i40evf_dev_close(struct rte_eth_dev *dev)  {
>>>>>  	struct i40e_hw *hw =
>>>> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>>>> -	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
>>>>> -	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
>>>>>
>>>>> +	rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev);
>>>>>  	i40evf_dev_stop(dev);
>>>>>  	i40e_dev_free_queues(dev);
>>>>>  	/*
>>>>> @@ -2300,12 +2298,6 @@ i40evf_dev_close(struct rte_eth_dev *dev)
>>>>>
>>>>>  	i40evf_reset_vf(hw);
>>>>>  	i40e_shutdown_adminq(hw);
>>>>> -	/* disable uio intr before callback unregister */
>>>>> -	rte_intr_disable(intr_handle);
>>>>> -
>>>>> -	/* unregister callback func from eal lib */
>>>>> -	rte_intr_callback_unregister(intr_handle,
>>>>> -				     i40evf_dev_interrupt_handler, dev);
>>>>>  	i40evf_disable_irq0(hw);
>>>>>  }
>>>>>
>>>>
>>>> Rather than adding a polling routine internally, why not change the
>>>> driver to not support Link State or receive interrupts. Better yet,
>>>> let the application decide.
>>>> Keep the interrupt logic but only enable interrupts if application
>>>> has requested LSC or recveive interrupt mode.
>>>
>>> The interrupt handler is not only for LSC (actually VF does not
>>> support LSC) or rx interrupt mode, it is used for PF to VF message through
>> admin queue which is always required.
>>
>> I guess the question is, is it possible to disable Rx interrupts? And if possible
>> can user control this enable/disable per interrupt source?
> 
> The problem is Rx interrupt is shared with admin queue interrupt, they are enable/disable together.

OK, thanks, this clarifies.

> So if we want to get admin queue message from interrupt , we have to suffer the massive interrupt handling.
> But admin queue used for pf/vf channel is supposed can't be closed, (though it can be delayed a little bit)
> 
> Regards
> Qi
> 
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-06-26 10:10 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-22  0:44 [dpdk-dev] [PATCH v2] net/i40e: remove VF interrupt handler Qi Zhang
2018-06-22 15:44 ` Stephen Hemminger
2018-06-24 10:56   ` Zhang, Qi Z
2018-06-26  9:15     ` Ferruh Yigit
2018-06-26 10:04       ` Zhang, Qi Z
2018-06-26 10:10         ` Ferruh Yigit

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).