DPDK patches and discussions
 help / color / mirror / Atom feed
From: Michal Krawczyk <mk@semihalf.com>
To: Marcin Wojtas <mw@semihalf.com>,
	Michal Krawczyk <mk@semihalf.com>,
	Guy Tzalik <gtzalik@amazon.com>,
	Evgeny Schemeilin <evgenys@amazon.com>
Cc: dev@dpdk.org, matua@amazon.com
Subject: [dpdk-dev] [PATCH v3 11/27] net/ena: add watchdog and keep alive AENQ handler
Date: Thu,  7 Jun 2018 11:43:06 +0200	[thread overview]
Message-ID: <20180607094322.14312-11-mk@semihalf.com> (raw)
In-Reply-To: <20180607094322.14312-1-mk@semihalf.com>

Keep alive is executing AENQ interrupt periodically. It allows to check
health of the device and trigger reset event if the device will stop
responding.

To check for the state of the device, the DPDK application must call
rte_timer_manage().

Signed-off-by: Michal Krawczyk <mk@semihalf.com>
---
 drivers/net/ena/ena_ethdev.c | 58 ++++++++++++++++++++++++++++++++++++++++++--
 drivers/net/ena/ena_ethdev.h |  9 +++++++
 2 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 4fae4fd66..796b6fc0a 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -249,6 +249,7 @@ static int ena_rss_reta_query(struct rte_eth_dev *dev,
 			      uint16_t reta_size);
 static int ena_get_sset_count(struct rte_eth_dev *dev, int sset);
 static void ena_interrupt_handler_rte(void *cb_arg);
+static void ena_timer_wd_callback(struct rte_timer *timer, void *arg);
 
 static const struct eth_dev_ops ena_dev_ops = {
 	.dev_configure        = ena_dev_configure,
@@ -979,6 +980,7 @@ static int ena_start(struct rte_eth_dev *dev)
 {
 	struct ena_adapter *adapter =
 		(struct ena_adapter *)(dev->data->dev_private);
+	uint64_t ticks;
 	int rc = 0;
 
 	rc = ena_check_valid_conf(adapter);
@@ -1002,6 +1004,13 @@ static int ena_start(struct rte_eth_dev *dev)
 
 	ena_stats_restart(dev);
 
+	adapter->timestamp_wd = rte_get_timer_cycles();
+	adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
+
+	ticks = rte_get_timer_hz();
+	rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(),
+			ena_timer_wd_callback, adapter);
+
 	adapter->state = ENA_ADAPTER_STATE_RUNNING;
 
 	return 0;
@@ -1012,6 +1021,8 @@ static void ena_stop(struct rte_eth_dev *dev)
 	struct ena_adapter *adapter =
 		(struct ena_adapter *)(dev->data->dev_private);
 
+	rte_timer_stop_sync(&adapter->timer_wd);
+
 	adapter->state = ENA_ADAPTER_STATE_STOPPED;
 }
 
@@ -1358,7 +1369,8 @@ static int ena_device_init(struct ena_com_dev *ena_dev,
 	}
 
 	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
-		      BIT(ENA_ADMIN_NOTIFICATION);
+		      BIT(ENA_ADMIN_NOTIFICATION) |
+		      BIT(ENA_ADMIN_KEEP_ALIVE);
 
 	aenq_groups &= get_feat_ctx->aenq.supported_groups;
 	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
@@ -1388,6 +1400,26 @@ static void ena_interrupt_handler_rte(void *cb_arg)
 		ena_com_aenq_intr_handler(ena_dev, adapter);
 }
 
+static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer,
+				  void *arg)
+{
+	struct ena_adapter *adapter = (struct ena_adapter *)arg;
+	struct rte_eth_dev *dev = adapter->rte_dev;
+
+	if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
+		return;
+
+	/* Within reasonable timing range no memory barriers are needed */
+	if ((rte_get_timer_cycles() - adapter->timestamp_wd) >=
+	    adapter->keep_alive_timeout) {
+		RTE_LOG(ERR, PMD, "The ENA device is not responding - "
+			"performing device reset...");
+		adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
+		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET,
+			NULL);
+	}
+}
+
 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev)
 {
 	struct rte_pci_device *pci_dev;
@@ -1490,6 +1522,10 @@ static int eth_ena_dev_init(struct rte_eth_dev *eth_dev)
 	ena_com_set_admin_polling_mode(ena_dev, false);
 	ena_com_admin_aenq_enable(ena_dev);
 
+	if (adapters_found == 0)
+		rte_timer_subsystem_init();
+	rte_timer_init(&adapter->timer_wd);
+
 	adapters_found++;
 	adapter->state = ENA_ADAPTER_STATE_INIT;
 
@@ -1803,6 +1839,16 @@ static void ena_update_hints(struct ena_adapter *adapter,
 		/* convert to usec */
 		adapter->ena_dev.mmio_read.reg_read_to =
 			hints->mmio_read_timeout * 1000;
+
+	if (hints->driver_watchdog_timeout) {
+		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
+			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
+		else
+			// Convert msecs to ticks
+			adapter->keep_alive_timeout =
+				(hints->driver_watchdog_timeout *
+				rte_get_timer_hz()) / 1000;
+	}
 }
 
 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
@@ -2022,6 +2068,14 @@ static void ena_notification(void *data,
 	}
 }
 
+static void ena_keep_alive(void *adapter_data,
+			   __rte_unused struct ena_admin_aenq_entry *aenq_e)
+{
+	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+
+	adapter->timestamp_wd = rte_get_timer_cycles();
+}
+
 /**
  * This handler will called for unknown event group or unimplemented handlers
  **/
@@ -2035,7 +2089,7 @@ static struct ena_aenq_handlers aenq_handlers = {
 	.handlers = {
 		[ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
 		[ENA_ADMIN_NOTIFICATION] = ena_notification,
-		[ENA_ADMIN_KEEP_ALIVE] = unimplemented_aenq_handler
+		[ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive
 	},
 	.unimplemented_handler = unimplemented_aenq_handler
 };
diff --git a/drivers/net/ena/ena_ethdev.h b/drivers/net/ena/ena_ethdev.h
index 79e9e655d..b44cca23e 100644
--- a/drivers/net/ena/ena_ethdev.h
+++ b/drivers/net/ena/ena_ethdev.h
@@ -34,8 +34,10 @@
 #ifndef _ENA_ETHDEV_H_
 #define _ENA_ETHDEV_H_
 
+#include <rte_cycles.h>
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
+#include <rte_timer.h>
 
 #include "ena_com.h"
 
@@ -50,6 +52,9 @@
 
 #define ENA_MMIO_DISABLE_REG_READ	BIT(0)
 
+#define ENA_WD_TIMEOUT_SEC	3
+#define ENA_DEVICE_KALIVE_TIMEOUT (ENA_WD_TIMEOUT_SEC * rte_get_timer_hz())
+
 struct ena_adapter;
 
 enum ena_ring_type {
@@ -185,6 +190,10 @@ struct ena_adapter {
 	bool link_status;
 
 	enum ena_regs_reset_reason_types reset_reason;
+
+	struct rte_timer timer_wd;
+	uint64_t timestamp_wd;
+	uint64_t keep_alive_timeout;
 };
 
 #endif /* _ENA_ETHDEV_H_ */
-- 
2.14.1

  parent reply	other threads:[~2018-06-07  9:43 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-07  9:42 [dpdk-dev] [PATCH v3 01/27] net/ena: change version number to 1.1.0 Michal Krawczyk
2018-06-07  9:42 ` [dpdk-dev] [PATCH v3 02/27] net/ena: update ena_com to the newer version Michal Krawczyk
2018-06-08 19:43   ` Ferruh Yigit
2018-06-11  9:54     ` [dpdk-dev] [PATCH v4 2/2] " Michal Krawczyk
2018-06-11 10:42       ` Ferruh Yigit
2018-06-11 11:01         ` [dpdk-dev] [PATCH v5 02/27] " Michal Krawczyk
2018-06-07  9:42 ` [dpdk-dev] [PATCH v3 03/27] net/ena: remove support of legacy LLQ Michal Krawczyk
2018-06-07  9:42 ` [dpdk-dev] [PATCH v3 04/27] net/ena: add interrupt handler for admin queue Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 05/27] net/ena: add stop and uninit routines Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 06/27] net/ena: add LSC intr support and AENQ handling Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 07/27] net/ena: handle ENA notification Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 08/27] net/ena: restart only initialized queues instead of all Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 09/27] net/ena: add reset routine Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 10/27] net/ena: add lrte_timer dependency for linking Michal Krawczyk
2018-06-08 19:17   ` Ferruh Yigit
2018-06-11  6:24     ` Michał Krawczyk
2018-06-17 22:37   ` Thomas Monjalon
2018-06-18  6:04     ` Michał Krawczyk
2018-06-18  8:30       ` Thomas Monjalon
2018-06-18  8:32         ` Michał Krawczyk
2018-06-07  9:43 ` Michal Krawczyk [this message]
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 12/27] net/ena: add checking for admin queue state Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 13/27] net/ena: make watchdog configurable Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 14/27] net/ena: add RX out of order completion Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 15/27] net/ena: linearize Tx mbuf Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 16/27] net/ena: add info about max number of Tx/Rx descriptors Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 17/27] net/ena: unimplemented handler error Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 18/27] net/ena: rework configuration of IO queue numbers Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 19/27] net/ena: validate Tx req id Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 20/27] net/ena: add (un)likely statements Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 21/27] net/ena: adjust error checking and cleaning Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 22/27] net/ena: update numa node Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 23/27] net/ena: check pointer before memset Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 24/27] net/ena: change memory type Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 25/27] net/ena: fix GENMASK_ULL macro Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 26/27] net/ena: store handle after memory allocation Michal Krawczyk
2018-06-07  9:43 ` [dpdk-dev] [PATCH v3 27/27] net/ena: set link speed as none Michal Krawczyk
2018-06-08 19:37   ` Ferruh Yigit
2018-06-10  1:35     ` Chas Williams
2018-06-11  8:01       ` Michał Krawczyk
2018-06-11 16:15         ` Chas Williams
2018-06-19  6:32           ` Michał Krawczyk
2018-06-08 19:45 ` [dpdk-dev] [PATCH v3 01/27] net/ena: change version number to 1.1.0 Ferruh Yigit
2018-06-11  8:45   ` Michał Krawczyk
2018-06-11  9:26     ` Ferruh Yigit
2018-06-11  9:33       ` Michał Krawczyk
2018-06-11  9:50         ` Ferruh Yigit
2018-06-11  9:52           ` Michał Krawczyk
2018-06-11 12:34 ` Ferruh Yigit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180607094322.14312-11-mk@semihalf.com \
    --to=mk@semihalf.com \
    --cc=dev@dpdk.org \
    --cc=evgenys@amazon.com \
    --cc=gtzalik@amazon.com \
    --cc=matua@amazon.com \
    --cc=mw@semihalf.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).