From: "Xing, Beilei" <beilei.xing@intel.com>
To: "Zhang, Qi Z" <qi.z.zhang@intel.com>
Cc: "Wu, Jingjing" <jingjing.wu@intel.com>,
"Yu, De" <de.yu@intel.com>, "dev@dpdk.org" <dev@dpdk.org>
Subject: Re: [dpdk-dev] [PATCH] net/i40e: remove VF interrupt handler
Date: Wed, 27 Jun 2018 03:48:14 +0000 [thread overview]
Message-ID: <94479800C636CB44BD422CB454846E01321CF9F1@SHSMSX101.ccr.corp.intel.com> (raw)
In-Reply-To: <20180607013156.28763-1-qi.z.zhang@intel.com>
> -----Original Message-----
> From: Zhang, Qi Z
> Sent: Thursday, June 7, 2018 9:32 AM
> To: Xing, Beilei <beilei.xing@intel.com>
> Cc: Wu, Jingjing <jingjing.wu@intel.com>; Yu, De <de.yu@intel.com>;
> dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>
> Subject: [PATCH] net/i40e: remove VF interrupt handler
>
> For i40evf, internal rx interrupt and adminq interrupt share the same source,
> that cause a lot cpu cycles be wasted on interrupt handler on rx path. This is
> complained by customers which require low latency (when set
> I40E_ITR_INTERVAL to small value), but have to be sufferred by tremendous
> interrupts handling that eat significant CPU resources.
>
> The patch disable pci interrupt and remove the interrupt handler, replace it
> with a low frequency (50ms) interrupt polling daemon which is implemented
> by registering a alarm callback periodly, this save CPU time significently: On a
> typical x86 server with 2.1GHz CPU, with low latency configure (32us) we saw
> CPU usage from top commmand reduced from 20% to 0% on management
> core in testpmd).
>
> Also with the new method we can remove compile option:
> I40E_ITR_INTERVAL which is used to balance between low latency and low
> CPU usage previously.
> Now we don't need it since we can reach both at same time.
>
> Suggested-by: Jingjing Wu <jingjing.wu@intel.com>
> Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
> ---
> config/common_base | 2 --
> drivers/net/i40e/i40e_ethdev.c | 3 +--
> drivers/net/i40e/i40e_ethdev.h | 22 +++++++++++-----------
> drivers/net/i40e/i40e_ethdev_vf.c | 36 ++++++++++++++----------------------
> 4 files changed, 26 insertions(+), 37 deletions(-)
>
> diff --git a/config/common_base b/config/common_base index
> 6b0d1cbbb..9e21c6865 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -264,8 +264,6 @@ CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
> CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC=n
> CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF=64
> CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
> -# interval up to 8160 us, aligned to 2 (or default value)
> -CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1
Seems " High Performance and per Packet Latency Tradeoff" is missed.
>
> #
> # Compile burst-oriented FM10K PMD
> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> index 13c5d3296..c8f9566e0 100644
> --- a/drivers/net/i40e/i40e_ethdev.c
> +++ b/drivers/net/i40e/i40e_ethdev.c
> @@ -1829,8 +1829,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi,
> uint16_t msix_vect,
> /* Write first RX queue to Link list register as the head element */
> if (vsi->type != I40E_VSI_SRIOV) {
> uint16_t interval =
> -
> i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 1,
> - pf->support_multi_driver);
> + i40e_calc_itr_interval(1, pf->support_multi_driver);
>
> if (msix_vect == I40E_MISC_VEC_ID) {
> I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, diff --git
> a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h index
> 11c4c76bd..599993dac 100644
> --- a/drivers/net/i40e/i40e_ethdev.h
> +++ b/drivers/net/i40e/i40e_ethdev.h
> @@ -178,7 +178,7 @@ enum i40e_flxpld_layer_idx {
> #define I40E_ITR_INDEX_NONE 3
> #define I40E_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
> #define I40E_QUEUE_ITR_INTERVAL_MAX 8160 /* 8160 us */
> -#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 8160 /* 8160 us */
> +#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
> /* Special FW support this floating VEB feature */ #define
> FLOATING_VEB_SUPPORTED_FW_MAJ 5 #define
> FLOATING_VEB_SUPPORTED_FW_MIN 0 @@ -1328,17 +1328,17 @@
> i40e_align_floor(int n) }
>
> static inline uint16_t
> -i40e_calc_itr_interval(int16_t interval, bool is_pf, bool is_multi_drv)
> +i40e_calc_itr_interval(bool is_pf, bool is_multi_drv)
> {
> - if (interval < 0 || interval > I40E_QUEUE_ITR_INTERVAL_MAX) {
> - if (is_multi_drv) {
> - interval = I40E_QUEUE_ITR_INTERVAL_MAX;
> - } else {
> - if (is_pf)
> - interval =
> I40E_QUEUE_ITR_INTERVAL_DEFAULT;
> - else
> - interval =
> I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
> - }
> + uint16_t interval = 0;
> +
> + if (is_multi_drv) {
> + interval = I40E_QUEUE_ITR_INTERVAL_MAX;
> + } else {
> + if (is_pf)
> + interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
> + else
> + interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
> }
>
> /* Convert to hardware count, as writing each 1 represents 2 us */
> diff --git a/drivers/net/i40e/i40e_ethdev_vf.c
> b/drivers/net/i40e/i40e_ethdev_vf.c
> index 804e44530..ad5c069e8 100644
> --- a/drivers/net/i40e/i40e_ethdev_vf.c
> +++ b/drivers/net/i40e/i40e_ethdev_vf.c
> @@ -44,6 +44,8 @@
> #define I40EVF_BUSY_WAIT_COUNT 50
> #define MAX_RESET_WAIT_CNT 20
>
> +#define I40EVF_ALARM_INTERVAL 50000 /* us */
> +
> struct i40evf_arq_msg_info {
> enum virtchnl_ops ops;
> enum i40e_status_code result;
> @@ -1133,7 +1135,7 @@ i40evf_init_vf(struct rte_eth_dev *dev)
> struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data-
> >dev_private);
> uint16_t interval =
> - i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
> + i40e_calc_itr_interval(0, 0);
>
> vf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data-
> >dev_private);
> vf->dev_data = dev->data;
> @@ -1370,7 +1372,7 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev)
> * void
> */
> static void
> -i40evf_dev_interrupt_handler(void *param)
> +i40evf_dev_alarm_handler(void *param)
> {
> struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
> struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> @@ -1399,6 +1401,8 @@ i40evf_dev_interrupt_handler(void *param)
>
> done:
> i40evf_enable_irq0(hw);
> + rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
> + i40evf_dev_alarm_handler, dev);
> }
>
> static int
> @@ -1442,12 +1446,8 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)
> return -1;
> }
>
> - /* register callback func to eal lib */
> - rte_intr_callback_register(&pci_dev->intr_handle,
> - i40evf_dev_interrupt_handler, (void *)eth_dev);
> -
> - /* enable uio intr after callback register */
> - rte_intr_enable(&pci_dev->intr_handle);
> + rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
> + i40evf_dev_alarm_handler, eth_dev);
>
> /* configure and enable device interrupt */
> i40evf_enable_irq0(hw);
> @@ -1836,7 +1836,7 @@ i40evf_dev_rx_queue_intr_enable(struct
> rte_eth_dev *dev, uint16_t queue_id)
> struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
> struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> uint16_t interval =
> - i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
> + i40e_calc_itr_interval(0, 0);
> uint16_t msix_intr;
>
> msix_intr = intr_handle->intr_vec[queue_id]; @@ -1859,8 +1859,6
> @@ i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t
> queue_id)
>
> I40EVF_WRITE_FLUSH(hw);
>
> - rte_intr_enable(&pci_dev->intr_handle);
> -
> return 0;
> }
>
> @@ -2023,10 +2021,8 @@ i40evf_dev_start(struct rte_eth_dev *dev)
> * queue interrupt to other VFIO vectors.
> * So clear uio/vfio intr/evevnfd first to avoid failure.
> */
> - if (dev->data->dev_conf.intr_conf.rxq != 0) {
> - rte_intr_disable(intr_handle);
Do we need to delete "rte_intr_disable" here? If so, should the comments be changed?
> + if (dev->data->dev_conf.intr_conf.rxq != 0)
> rte_intr_enable(intr_handle);
> - }
>
> i40evf_enable_queues_intr(dev);
>
> @@ -2050,6 +2046,9 @@ i40evf_dev_stop(struct rte_eth_dev *dev)
>
> PMD_INIT_FUNC_TRACE();
>
> + if (dev->data->dev_conf.intr_conf.rxq != 0)
> + rte_intr_disable(intr_handle);
> +
> if (hw->adapter_stopped == 1)
> return;
> i40evf_stop_queues(dev);
> @@ -2285,9 +2284,8 @@ static void
> i40evf_dev_close(struct rte_eth_dev *dev) {
> struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> - struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> - struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
>
> + rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev);
> i40evf_dev_stop(dev);
> i40e_dev_free_queues(dev);
> /*
> @@ -2300,12 +2298,6 @@ i40evf_dev_close(struct rte_eth_dev *dev)
>
> i40evf_reset_vf(hw);
> i40e_shutdown_adminq(hw);
> - /* disable uio intr before callback unregister */
> - rte_intr_disable(intr_handle);
> -
> - /* unregister callback func from eal lib */
> - rte_intr_callback_unregister(intr_handle,
> - i40evf_dev_interrupt_handler, dev);
> i40evf_disable_irq0(hw);
> }
>
> --
> 2.13.6
next prev parent reply other threads:[~2018-06-27 3:48 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-06-07 1:31 Qi Zhang
2018-06-07 7:14 ` Xing, Beilei
2018-06-07 13:16 ` Zhang, Qi Z
2018-06-27 3:48 ` Xing, Beilei [this message]
2018-06-27 3:59 ` Zhang, Qi Z
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=94479800C636CB44BD422CB454846E01321CF9F1@SHSMSX101.ccr.corp.intel.com \
--to=beilei.xing@intel.com \
--cc=de.yu@intel.com \
--cc=dev@dpdk.org \
--cc=jingjing.wu@intel.com \
--cc=qi.z.zhang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).