DPDK patches and discussions
 help / color / mirror / Atom feed
From: Matan Azrad <matan@mellanox.com>
To: Adrien Mazarguil <adrien.mazarguil@6wind.com>,
	Thomas Monjalon <thomas@monjalon.net>,
	Gaetan Rivet <gaetan.rivet@6wind.com>
Cc: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v3 6/6] net/failsafe: fix removed device handling
Date: Tue, 19 Dec 2017 17:10:15 +0000	[thread overview]
Message-ID: <1513703415-29145-7-git-send-email-matan@mellanox.com> (raw)
In-Reply-To: <1513703415-29145-1-git-send-email-matan@mellanox.com>

There is time between the physical removal of the device until
sub-device PMDs get a RMV interrupt. At this time DPDK PMDs and
applications still don't know about the removal and may call sub-device
control operation which should return an error.

In previous code this error is reported to the application contrary to
fail-safe principle that the app should not be aware of device removal.

Add an removal check in each relevant control command error flow and
prevent an error report to application when the sub-device is removed.

Fixes: a46f8d5 ("net/failsafe: add fail-safe PMD")
Fixes: b737a1e ("net/failsafe: support flow API")

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/failsafe/failsafe_flow.c    | 18 ++++++++++-------
 drivers/net/failsafe/failsafe_ops.c     | 35 ++++++++++++++++++++++-----------
 drivers/net/failsafe/failsafe_private.h | 12 +++++++++++
 3 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_flow.c b/drivers/net/failsafe/failsafe_flow.c
index 153ceee..123acb4 100644
--- a/drivers/net/failsafe/failsafe_flow.c
+++ b/drivers/net/failsafe/failsafe_flow.c
@@ -87,7 +87,7 @@
 		DEBUG("Calling rte_flow_validate on sub_device %d", i);
 		ret = rte_flow_validate(PORT_ID(sdev),
 				attr, patterns, actions, error);
-		if (ret) {
+		if (fs_is_error(sdev, ret)) {
 			ERROR("Operation rte_flow_validate failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
@@ -111,7 +111,7 @@
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		flow->flows[i] = rte_flow_create(PORT_ID(sdev),
 				attr, patterns, actions, error);
-		if (flow->flows[i] == NULL) {
+		if (flow->flows[i] == NULL && fs_is_error(sdev, -rte_errno)) {
 			ERROR("Failed to create flow on sub_device %d",
 				i);
 			goto err;
@@ -150,7 +150,7 @@
 			continue;
 		local_ret = rte_flow_destroy(PORT_ID(sdev),
 				flow->flows[i], error);
-		if (local_ret) {
+		if (fs_is_error(sdev, local_ret)) {
 			ERROR("Failed to destroy flow on sub_device %d: %d",
 					i, local_ret);
 			if (ret == 0)
@@ -175,7 +175,7 @@
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_flow_flush on sub_device %d", i);
 		ret = rte_flow_flush(PORT_ID(sdev), error);
-		if (ret) {
+		if (fs_is_error(sdev, ret)) {
 			ERROR("Operation rte_flow_flush failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
@@ -199,8 +199,12 @@
 
 	sdev = TX_SUBDEV(dev);
 	if (sdev != NULL) {
-		return rte_flow_query(PORT_ID(sdev),
-				flow->flows[SUB_ID(sdev)], type, arg, error);
+		int ret = rte_flow_query(PORT_ID(sdev),
+					 flow->flows[SUB_ID(sdev)],
+					 type, arg, error);
+
+		if (fs_is_error(sdev, ret))
+			return ret;
 	}
 	WARN("No active sub_device to query about its flow");
 	return -1;
@@ -223,7 +227,7 @@
 			WARN("flow isolation mode of sub_device %d in incoherent state.",
 				i);
 		ret = rte_flow_isolate(PORT_ID(sdev), set, error);
-		if (ret) {
+		if (fs_is_error(sdev, ret)) {
 			ERROR("Operation rte_flow_isolate failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index e16a590..313ea2f 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -121,6 +121,8 @@
 					dev->data->nb_tx_queues,
 					&dev->data->dev_conf);
 		if (ret) {
+			if (!fs_is_error(sdev, ret))
+				continue;
 			ERROR("Could not configure sub_device %d", i);
 			return ret;
 		}
@@ -163,8 +165,11 @@
 			continue;
 		DEBUG("Starting sub_device %d", i);
 		ret = rte_eth_dev_start(PORT_ID(sdev));
-		if (ret)
+		if (ret) {
+			if (!fs_is_error(sdev, ret))
+				continue;
 			return ret;
+		}
 		sdev->state = DEV_STARTED;
 	}
 	if (PRIV(dev)->state < DEV_STARTED)
@@ -196,7 +201,7 @@
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
 		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
-		if (ret) {
+		if (fs_is_error(sdev, ret)) {
 			ERROR("Operation rte_eth_dev_set_link_up failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
@@ -215,7 +220,7 @@
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
 		ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
-		if (ret) {
+		if (fs_is_error(sdev, ret)) {
 			ERROR("Operation rte_eth_dev_set_link_down failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
@@ -300,7 +305,7 @@
 				rx_queue_id,
 				nb_rx_desc, socket_id,
 				rx_conf, mb_pool);
-		if (ret) {
+		if (fs_is_error(sdev, ret)) {
 			ERROR("RX queue setup failed for sub_device %d", i);
 			goto free_rxq;
 		}
@@ -366,7 +371,7 @@
 				tx_queue_id,
 				nb_tx_desc, socket_id,
 				tx_conf);
-		if (ret) {
+		if (fs_is_error(sdev, ret)) {
 			ERROR("TX queue setup failed for sub_device %d", i);
 			goto free_txq;
 		}
@@ -445,7 +450,8 @@
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling link_update on sub_device %d", i);
 		ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
-		if (ret && ret != -1) {
+		if (ret && ret != -1 && sdev->remove == 0 &&
+		    rte_eth_dev_is_removed(PORT_ID(sdev)) == 0) {
 			ERROR("Link update failed for sub_device %d with error %d",
 			      i, ret);
 			return ret;
@@ -469,6 +475,7 @@
 fs_stats_get(struct rte_eth_dev *dev,
 	     struct rte_eth_stats *stats)
 {
+	struct rte_eth_stats backup;
 	struct sub_device *sdev;
 	uint8_t i;
 	int ret;
@@ -478,14 +485,20 @@
 		struct rte_eth_stats *snapshot = &sdev->stats_snapshot.stats;
 		uint64_t *timestamp = &sdev->stats_snapshot.timestamp;
 
+		rte_memcpy(&backup, snapshot, sizeof(backup));
 		ret = rte_eth_stats_get(PORT_ID(sdev), snapshot);
 		if (ret) {
+			if (!fs_is_error(sdev, ret)) {
+				rte_memcpy(snapshot, &backup, sizeof(backup));
+				goto inc;
+			}
 			ERROR("Operation rte_eth_stats_get failed for sub_device %d with error %d",
 				  i, ret);
 			*timestamp = 0;
 			return ret;
 		}
 		*timestamp = rte_rdtsc();
+inc:
 		failsafe_stats_increment(stats, snapshot);
 	}
 	return 0;
@@ -598,7 +611,7 @@
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
 		ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
-		if (ret) {
+		if (fs_is_error(sdev, ret)) {
 			ERROR("Operation rte_eth_dev_set_mtu failed for sub_device %d with error %d",
 			      i, ret);
 			return ret;
@@ -617,7 +630,7 @@
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
 		ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
-		if (ret) {
+		if (fs_is_error(sdev, ret)) {
 			ERROR("Operation rte_eth_dev_vlan_filter failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
@@ -651,7 +664,7 @@
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
 		ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
-		if (ret) {
+		if (fs_is_error(sdev, ret)) {
 			ERROR("Operation rte_eth_dev_flow_ctrl_set failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
@@ -688,7 +701,7 @@
 	RTE_ASSERT(index < FAILSAFE_MAX_ETHADDR);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
-		if (ret) {
+		if (fs_is_error(sdev, ret)) {
 			ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
 			      PRIu8 " with error %d", i, ret);
 			return ret;
@@ -730,7 +743,7 @@
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
 		ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
-		if (ret) {
+		if (fs_is_error(sdev, ret)) {
 			ERROR("Operation rte_eth_dev_filter_ctrl failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index d81cc3c..585b554 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -34,6 +34,7 @@
 #ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_
 #define _RTE_ETH_FAILSAFE_PRIVATE_H_
 
+#include <stdbool.h>
 #include <sys/queue.h>
 
 #include <rte_atomic.h>
@@ -375,4 +376,15 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
 	rte_wmb();
 }
 
+/*
+ * Check if error should be reported to the user.
+ */
+static inline bool
+fs_is_error(struct sub_device *sdev, int err)
+{
+	/* A device removal shouldn't be reported as an error. */
+	if (err == 0 || sdev->remove == 1 || err == -EIO)
+		return false;
+	return true;
+}
 #endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */
-- 
1.8.3.1

  parent reply	other threads:[~2017-12-19 17:10 UTC|newest]

Thread overview: 98+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-02 15:42 [dpdk-dev] [PATCH 0/3] Fail-safe fix removal handling lack Matan Azrad
2017-11-02 15:42 ` [dpdk-dev] [PATCH 1/3] net/failsafe: " Matan Azrad
2017-11-06  8:19   ` Gaëtan Rivet
2017-11-02 15:42 ` [dpdk-dev] [PATCH 2/3] net/mlx4: adjust removal error Matan Azrad
2017-11-03 13:05   ` Adrien Mazarguil
2017-11-05  6:52     ` Matan Azrad
2017-11-06 16:51       ` Adrien Mazarguil
2017-11-02 15:42 ` [dpdk-dev] [PATCH 3/3] net/mlx5: " Matan Azrad
2017-11-03 13:06   ` Adrien Mazarguil
2017-11-05  6:57     ` Matan Azrad
2017-12-13 14:29 ` [dpdk-dev] [PATCH v2 0/4] Fail-safe fix removal handling lack Matan Azrad
2017-12-13 14:29   ` [dpdk-dev] [PATCH v2 1/4] ethdev: add devop to check removal status Matan Azrad
2017-12-13 14:29   ` [dpdk-dev] [PATCH v2 2/4] net/mlx4: support a device removal check operation Matan Azrad
2017-12-13 14:29   ` [dpdk-dev] [PATCH v2 3/4] net/mlx5: " Matan Azrad
2017-12-13 14:29   ` [dpdk-dev] [PATCH v2 4/4] net/failsafe: fix removed device handling Matan Azrad
2017-12-13 15:16     ` Gaëtan Rivet
2017-12-13 15:48       ` Matan Azrad
2017-12-13 16:09         ` Gaëtan Rivet
2017-12-13 17:09           ` Thomas Monjalon
2017-12-14 10:40             ` Matan Azrad
2017-12-13 21:55           ` Gaëtan Rivet
2017-12-14 10:40             ` Matan Azrad
2017-12-14 10:48               ` Gaëtan Rivet
2017-12-14 13:07                 ` Matan Azrad
2017-12-14 13:27                   ` Gaëtan Rivet
2017-12-14 14:43                     ` Matan Azrad
2017-12-19 17:10   ` [dpdk-dev] [PATCH v3 0/6] Fail-safe\ethdev: fix removal handling lack Matan Azrad
2017-12-19 17:10     ` [dpdk-dev] [PATCH v3 1/6] ethdev: add devop to check removal status Matan Azrad
2017-12-19 17:20       ` Stephen Hemminger
2017-12-19 17:24         ` Matan Azrad
2017-12-19 20:51           ` Thomas Monjalon
2017-12-19 22:13             ` Gaëtan Rivet
2017-12-20  8:39               ` Matan Azrad
2018-01-07  9:53       ` Thomas Monjalon
2017-12-19 17:10     ` [dpdk-dev] [PATCH v3 2/6] net/mlx4: support a device removal check operation Matan Azrad
2017-12-19 17:10     ` [dpdk-dev] [PATCH v3 3/6] net/mlx5: " Matan Azrad
2017-12-19 17:10     ` [dpdk-dev] [PATCH v3 4/6] ethdev: adjust APIs removal error report Matan Azrad
2018-01-07  9:56       ` Thomas Monjalon
2017-12-19 17:10     ` [dpdk-dev] [PATCH v3 5/6] ethdev: adjust flow " Matan Azrad
2018-01-07  9:58       ` Thomas Monjalon
2017-12-19 17:10     ` Matan Azrad [this message]
2017-12-19 22:21       ` [dpdk-dev] [PATCH v3 6/6] net/failsafe: fix removed device handling Gaëtan Rivet
2017-12-20 10:58         ` Matan Azrad
2018-01-08 10:57           ` Gaëtan Rivet
2018-01-08 12:55             ` Matan Azrad
2018-01-08 13:46               ` Gaëtan Rivet
2018-01-08 14:00                 ` Matan Azrad
2018-01-08 14:31                   ` Gaëtan Rivet
2018-01-10 12:30     ` [dpdk-dev] [PATCH v4 0/6] Fail-safe\ethdev: fix removal handling lack Matan Azrad
2018-01-10 12:31       ` [dpdk-dev] [PATCH v4 1/6] ethdev: add devop to check removal status Matan Azrad
2018-01-10 12:31       ` [dpdk-dev] [PATCH v4 2/6] net/mlx4: support a device removal check operation Matan Azrad
2018-01-10 12:31       ` [dpdk-dev] [PATCH v4 3/6] net/mlx5: " Matan Azrad
2018-01-10 12:31       ` [dpdk-dev] [PATCH v4 4/6] ethdev: adjust APIs removal error report Matan Azrad
2018-01-10 12:31       ` [dpdk-dev] [PATCH v4 5/6] ethdev: adjust flow " Matan Azrad
2018-01-10 12:31       ` [dpdk-dev] [PATCH v4 6/6] net/failsafe: fix removed device handling Matan Azrad
2018-01-10 12:43         ` Matan Azrad
2018-01-10 13:51           ` Gaëtan Rivet
2018-01-10 13:47         ` Gaëtan Rivet
2018-01-17 20:19       ` [dpdk-dev] [PATCH v5 0/6] Fail-safe\ethdev: fix removal handling lack Matan Azrad
2018-01-17 20:19         ` [dpdk-dev] [PATCH v5 1/6] ethdev: add devop to check removal status Matan Azrad
2018-01-17 20:40           ` Ferruh Yigit
2018-01-17 20:19         ` [dpdk-dev] [PATCH v5 2/6] net/mlx4: support a device removal check operation Matan Azrad
2018-01-17 20:19         ` [dpdk-dev] [PATCH v5 3/6] net/mlx5: " Matan Azrad
2018-01-17 20:19         ` [dpdk-dev] [PATCH v5 4/6] ethdev: adjust APIs removal error report Matan Azrad
2018-01-17 20:19         ` [dpdk-dev] [PATCH v5 5/6] ethdev: adjust flow " Matan Azrad
2018-01-17 20:19         ` [dpdk-dev] [PATCH v5 6/6] net/failsafe: fix removed device handling Matan Azrad
2018-01-18  8:44           ` Gaëtan Rivet
2018-01-18 11:27         ` [dpdk-dev] [PATCH v6 0/6] Fail-safe\ethdev: fix removal handling lack Matan Azrad
2018-01-18 11:27           ` [dpdk-dev] [PATCH v6 1/6] ethdev: add devop to check removal status Matan Azrad
2018-01-18 17:18             ` Ferruh Yigit
2018-01-18 17:57               ` Adrien Mazarguil
2018-01-18 18:02               ` Matan Azrad
2018-01-18 11:27           ` [dpdk-dev] [PATCH v6 2/6] net/mlx4: support a device removal check operation Matan Azrad
2018-01-18 16:59             ` Adrien Mazarguil
2018-01-18 11:27           ` [dpdk-dev] [PATCH v6 3/6] net/mlx5: " Matan Azrad
2018-01-18 16:59             ` Adrien Mazarguil
2018-01-18 11:27           ` [dpdk-dev] [PATCH v6 4/6] ethdev: adjust APIs removal error report Matan Azrad
2018-01-18 17:31             ` Ferruh Yigit
2018-01-18 18:10               ` Matan Azrad
2018-01-19 16:19                 ` Ferruh Yigit
2018-01-19 17:35                   ` Ananyev, Konstantin
2018-01-19 17:54                   ` Thomas Monjalon
2018-01-19 18:13                     ` Ferruh Yigit
2018-01-19 18:16                       ` Thomas Monjalon
2018-01-20 19:04                         ` Matan Azrad
2018-01-20 20:28                           ` Thomas Monjalon
2018-01-20 20:45                             ` Matan Azrad
2018-01-21 20:07                   ` Ferruh Yigit
2018-01-18 11:27           ` [dpdk-dev] [PATCH v6 5/6] ethdev: adjust flow " Matan Azrad
2018-01-18 11:27           ` [dpdk-dev] [PATCH v6 6/6] net/failsafe: fix removed device handling Matan Azrad
2018-01-20 21:12           ` [dpdk-dev] [PATCH v7 0/6] Fail-safe\ethdev: fix removal handling lack Matan Azrad
2018-01-20 21:12             ` [dpdk-dev] [PATCH v7 1/6] ethdev: add devop to check removal status Matan Azrad
2018-01-20 21:12             ` [dpdk-dev] [PATCH v7 2/6] net/mlx4: support a device removal check operation Matan Azrad
2018-01-20 21:12             ` [dpdk-dev] [PATCH v7 3/6] net/mlx5: " Matan Azrad
2018-01-20 21:12             ` [dpdk-dev] [PATCH v7 4/6] ethdev: adjust APIs removal error report Matan Azrad
2018-01-20 21:12             ` [dpdk-dev] [PATCH v7 5/6] ethdev: adjust flow " Matan Azrad
2018-01-20 21:12             ` [dpdk-dev] [PATCH v7 6/6] net/failsafe: fix removed device handling Matan Azrad
2018-01-21 20:28             ` [dpdk-dev] [PATCH v7 0/6] Fail-safe\ethdev: fix removal handling lack Ferruh Yigit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1513703415-29145-7-git-send-email-matan@mellanox.com \
    --to=matan@mellanox.com \
    --cc=adrien.mazarguil@6wind.com \
    --cc=dev@dpdk.org \
    --cc=gaetan.rivet@6wind.com \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).