DPDK patches and discussions
 help / color / mirror / Atom feed
From: "Xing, Beilei" <beilei.xing@intel.com>
To: "Zhang, Qi Z" <qi.z.zhang@intel.com>
Cc: "Wu, Jingjing" <jingjing.wu@intel.com>,
	"Yu, De" <de.yu@intel.com>, "dev@dpdk.org" <dev@dpdk.org>
Subject: Re: [dpdk-dev] [PATCH] net/i40e: remove VF interrupt handler
Date: Wed, 27 Jun 2018 03:48:14 +0000	[thread overview]
Message-ID: <94479800C636CB44BD422CB454846E01321CF9F1@SHSMSX101.ccr.corp.intel.com> (raw)
In-Reply-To: <20180607013156.28763-1-qi.z.zhang@intel.com>



> -----Original Message-----
> From: Zhang, Qi Z
> Sent: Thursday, June 7, 2018 9:32 AM
> To: Xing, Beilei <beilei.xing@intel.com>
> Cc: Wu, Jingjing <jingjing.wu@intel.com>; Yu, De <de.yu@intel.com>;
> dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>
> Subject: [PATCH] net/i40e: remove VF interrupt handler
> 
> For i40evf, internal rx interrupt and adminq interrupt share the same source,
> that cause a lot cpu cycles be wasted on interrupt handler on rx path. This is
> complained by customers which require low latency (when set
> I40E_ITR_INTERVAL to small value), but have to be sufferred by tremendous
> interrupts handling that eat significant CPU resources.
> 
> The patch disable pci interrupt and remove the interrupt handler, replace it
> with a low frequency (50ms) interrupt polling daemon which is implemented
> by registering a alarm callback periodly, this save CPU time significently: On a
> typical x86 server with 2.1GHz CPU, with low latency configure (32us) we saw
> CPU usage from top commmand reduced from 20% to 0% on management
> core in testpmd).
> 
> Also with the new method we can remove compile option:
> I40E_ITR_INTERVAL which is used to balance between low latency and low
> CPU usage previously.
> Now we don't need it since we can reach both at same time.
> 
> Suggested-by: Jingjing Wu <jingjing.wu@intel.com>
> Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
> ---
>  config/common_base                |  2 --
>  drivers/net/i40e/i40e_ethdev.c    |  3 +--
>  drivers/net/i40e/i40e_ethdev.h    | 22 +++++++++++-----------
>  drivers/net/i40e/i40e_ethdev_vf.c | 36 ++++++++++++++----------------------
>  4 files changed, 26 insertions(+), 37 deletions(-)
> 
> diff --git a/config/common_base b/config/common_base index
> 6b0d1cbbb..9e21c6865 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -264,8 +264,6 @@ CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
>  CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC=n
>  CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF=64
>  CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
> -# interval up to 8160 us, aligned to 2 (or default value)
> -CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1

Seems " High Performance and per Packet Latency Tradeoff" is missed.

> 
>  #
>  # Compile burst-oriented FM10K PMD
> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> index 13c5d3296..c8f9566e0 100644
> --- a/drivers/net/i40e/i40e_ethdev.c
> +++ b/drivers/net/i40e/i40e_ethdev.c
> @@ -1829,8 +1829,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi,
> uint16_t msix_vect,
>  	/* Write first RX queue to Link list register as the head element */
>  	if (vsi->type != I40E_VSI_SRIOV) {
>  		uint16_t interval =
> -
> 	i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 1,
> -					       pf->support_multi_driver);
> +			i40e_calc_itr_interval(1, pf->support_multi_driver);
> 
>  		if (msix_vect == I40E_MISC_VEC_ID) {
>  			I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, diff --git
> a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h index
> 11c4c76bd..599993dac 100644
> --- a/drivers/net/i40e/i40e_ethdev.h
> +++ b/drivers/net/i40e/i40e_ethdev.h
> @@ -178,7 +178,7 @@ enum i40e_flxpld_layer_idx {
>  #define I40E_ITR_INDEX_NONE             3
>  #define I40E_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
>  #define I40E_QUEUE_ITR_INTERVAL_MAX     8160 /* 8160 us */
> -#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 8160 /* 8160 us */
> +#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
>  /* Special FW support this floating VEB feature */  #define
> FLOATING_VEB_SUPPORTED_FW_MAJ 5  #define
> FLOATING_VEB_SUPPORTED_FW_MIN 0 @@ -1328,17 +1328,17 @@
> i40e_align_floor(int n)  }
> 
>  static inline uint16_t
> -i40e_calc_itr_interval(int16_t interval, bool is_pf, bool is_multi_drv)
> +i40e_calc_itr_interval(bool is_pf, bool is_multi_drv)
>  {
> -	if (interval < 0 || interval > I40E_QUEUE_ITR_INTERVAL_MAX) {
> -		if (is_multi_drv) {
> -			interval = I40E_QUEUE_ITR_INTERVAL_MAX;
> -		} else {
> -			if (is_pf)
> -				interval =
> I40E_QUEUE_ITR_INTERVAL_DEFAULT;
> -			else
> -				interval =
> I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
> -		}
> +	uint16_t interval = 0;
> +
> +	if (is_multi_drv) {
> +		interval = I40E_QUEUE_ITR_INTERVAL_MAX;
> +	} else {
> +		if (is_pf)
> +			interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
> +		else
> +			interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
>  	}
> 
>  	/* Convert to hardware count, as writing each 1 represents 2 us */
> diff --git a/drivers/net/i40e/i40e_ethdev_vf.c
> b/drivers/net/i40e/i40e_ethdev_vf.c
> index 804e44530..ad5c069e8 100644
> --- a/drivers/net/i40e/i40e_ethdev_vf.c
> +++ b/drivers/net/i40e/i40e_ethdev_vf.c
> @@ -44,6 +44,8 @@
>  #define I40EVF_BUSY_WAIT_COUNT 50
>  #define MAX_RESET_WAIT_CNT     20
> 
> +#define I40EVF_ALARM_INTERVAL 50000 /* us */
> +
>  struct i40evf_arq_msg_info {
>  	enum virtchnl_ops ops;
>  	enum i40e_status_code result;
> @@ -1133,7 +1135,7 @@ i40evf_init_vf(struct rte_eth_dev *dev)
>  	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
>  	struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data-
> >dev_private);
>  	uint16_t interval =
> -		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
> +		i40e_calc_itr_interval(0, 0);
> 
>  	vf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data-
> >dev_private);
>  	vf->dev_data = dev->data;
> @@ -1370,7 +1372,7 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev)
>   *  void
>   */
>  static void
> -i40evf_dev_interrupt_handler(void *param)
> +i40evf_dev_alarm_handler(void *param)
>  {
>  	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
>  	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> @@ -1399,6 +1401,8 @@ i40evf_dev_interrupt_handler(void *param)
> 
>  done:
>  	i40evf_enable_irq0(hw);
> +	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
> +			  i40evf_dev_alarm_handler, dev);
>  }
> 
>  static int
> @@ -1442,12 +1446,8 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)
>  		return -1;
>  	}
> 
> -	/* register callback func to eal lib */
> -	rte_intr_callback_register(&pci_dev->intr_handle,
> -		i40evf_dev_interrupt_handler, (void *)eth_dev);
> -
> -	/* enable uio intr after callback register */
> -	rte_intr_enable(&pci_dev->intr_handle);
> +	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
> +			  i40evf_dev_alarm_handler, eth_dev);
> 
>  	/* configure and enable device interrupt */
>  	i40evf_enable_irq0(hw);
> @@ -1836,7 +1836,7 @@ i40evf_dev_rx_queue_intr_enable(struct
> rte_eth_dev *dev, uint16_t queue_id)
>  	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
>  	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
>  	uint16_t interval =
> -		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
> +		i40e_calc_itr_interval(0, 0);
>  	uint16_t msix_intr;
> 
>  	msix_intr = intr_handle->intr_vec[queue_id]; @@ -1859,8 +1859,6
> @@ i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t
> queue_id)
> 
>  	I40EVF_WRITE_FLUSH(hw);
> 
> -	rte_intr_enable(&pci_dev->intr_handle);
> -
>  	return 0;
>  }
> 
> @@ -2023,10 +2021,8 @@ i40evf_dev_start(struct rte_eth_dev *dev)
>  	 * queue interrupt to other VFIO vectors.
>  	 * So clear uio/vfio intr/evevnfd first to avoid failure.
>  	 */
> -	if (dev->data->dev_conf.intr_conf.rxq != 0) {
> -		rte_intr_disable(intr_handle);

Do we need to delete "rte_intr_disable" here? If so, should the comments be changed?

> +	if (dev->data->dev_conf.intr_conf.rxq != 0)
>  		rte_intr_enable(intr_handle);
> -	}
> 
>  	i40evf_enable_queues_intr(dev);
> 
> @@ -2050,6 +2046,9 @@ i40evf_dev_stop(struct rte_eth_dev *dev)
> 
>  	PMD_INIT_FUNC_TRACE();
> 
> +	if (dev->data->dev_conf.intr_conf.rxq != 0)
> +		rte_intr_disable(intr_handle);
> +
>  	if (hw->adapter_stopped == 1)
>  		return;
>  	i40evf_stop_queues(dev);
> @@ -2285,9 +2284,8 @@ static void
>  i40evf_dev_close(struct rte_eth_dev *dev)  {
>  	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> -	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> -	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
> 
> +	rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev);
>  	i40evf_dev_stop(dev);
>  	i40e_dev_free_queues(dev);
>  	/*
> @@ -2300,12 +2298,6 @@ i40evf_dev_close(struct rte_eth_dev *dev)
> 
>  	i40evf_reset_vf(hw);
>  	i40e_shutdown_adminq(hw);
> -	/* disable uio intr before callback unregister */
> -	rte_intr_disable(intr_handle);
> -
> -	/* unregister callback func from eal lib */
> -	rte_intr_callback_unregister(intr_handle,
> -				     i40evf_dev_interrupt_handler, dev);
>  	i40evf_disable_irq0(hw);
>  }
> 
> --
> 2.13.6

  parent reply	other threads:[~2018-06-27  3:48 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-07  1:31 Qi Zhang
2018-06-07  7:14 ` Xing, Beilei
2018-06-07 13:16   ` Zhang, Qi Z
2018-06-27  3:48 ` Xing, Beilei [this message]
2018-06-27  3:59   ` Zhang, Qi Z

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=94479800C636CB44BD422CB454846E01321CF9F1@SHSMSX101.ccr.corp.intel.com \
    --to=beilei.xing@intel.com \
    --cc=de.yu@intel.com \
    --cc=dev@dpdk.org \
    --cc=jingjing.wu@intel.com \
    --cc=qi.z.zhang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).