DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [v4] net/e1000: i219 unit hang issue fix on reset/close
@ 2019-07-10  0:42 Xiao Zhang
  2019-07-10  4:48 ` Anand H. Krishnan
                   ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Xiao Zhang @ 2019-07-10  0:42 UTC (permalink / raw)
  To: dev; +Cc: wei.zhao1, anandhkrishnan, Xiao Zhang

Unit hang may occur if multiple descriptors are available in the rings
during reset or close. This state can be detected by configure status
by bit 8 in register. If the bit is set and there are pending descriptors
in one of the rings, we must flush them before reset or close.

Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>
---
 drivers/net/e1000/e1000_ethdev.h |   4 ++
 drivers/net/e1000/igb_ethdev.c   |   4 ++
 drivers/net/e1000/igb_rxtx.c     | 105 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 113 insertions(+)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 67acb73..349144a 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -35,6 +35,9 @@
 #define IGB_MAX_RX_QUEUE_NUM           8
 #define IGB_MAX_RX_QUEUE_NUM_82576     16
 
+#define E1000_I219_MAX_RX_QUEUE_NUM		2
+#define E1000_I219_MAX_TX_QUEUE_NUM		2
+
 #define E1000_SYN_FILTER_ENABLE        0x00000001 /* syn filter enable field */
 #define E1000_SYN_FILTER_QUEUE         0x0000000E /* syn filter queue field */
 #define E1000_SYN_FILTER_QUEUE_SHIFT   1          /* syn filter queue field */
@@ -522,5 +525,6 @@ int igb_action_rss_same(const struct rte_flow_action_rss *comp,
 int igb_config_rss_filter(struct rte_eth_dev *dev,
 			struct igb_rte_flow_rss_conf *conf,
 			bool add);
+void igb_flush_desc_rings(struct rte_eth_dev *dev);
 
 #endif /* _E1000_ETHDEV_H_ */
diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index 3ee28cf..845101b 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -1589,6 +1589,10 @@ eth_igb_close(struct rte_eth_dev *dev)
 	eth_igb_stop(dev);
 	adapter->stopped = 1;
 
+	/* Flush desc rings for i219 */
+	if (hw->mac.type >= e1000_pch_spt)
+		igb_flush_desc_rings(dev);
+
 	e1000_phy_hw_reset(hw);
 	igb_release_manageability(hw);
 	igb_hw_control_release(hw);
diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
index c5606de..cad28f3 100644
--- a/drivers/net/e1000/igb_rxtx.c
+++ b/drivers/net/e1000/igb_rxtx.c
@@ -18,6 +18,7 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_memory.h>
 #include <rte_memcpy.h>
 #include <rte_memzone.h>
@@ -63,6 +64,10 @@
 #define IGB_TX_OFFLOAD_NOTSUP_MASK \
 		(PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
 
+/* PCI offset for querying descriptor ring status*/
+#define PCICFG_DESC_RING_STATUS           0xE4
+#define FLUSH_DESC_REQUIRED               0x100
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -2962,3 +2967,103 @@ igb_config_rss_filter(struct rte_eth_dev *dev,
 
 	return 0;
 }
+
+static void e1000_flush_tx_ring(struct rte_eth_dev *dev)
+{
+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	volatile union e1000_adv_tx_desc *tx_desc;
+	uint32_t tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
+	uint16_t size = 512;
+	struct igb_tx_queue *txq;
+	int i;
+
+	if (dev->data->tx_queues == NULL)
+		return;
+	tctl = E1000_READ_REG(hw, E1000_TCTL);
+	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
+	for (i = 0; i < dev->data->nb_tx_queues &&
+		i < E1000_I219_MAX_TX_QUEUE_NUM; i++) {
+		txq = dev->data->tx_queues[i];
+		tdt = E1000_READ_REG(hw, E1000_TDT(i));
+		if (tdt != txq->tx_tail)
+			return;
+		tx_desc = &txq->tx_ring[txq->tx_tail];
+		tx_desc->read.buffer_addr = txq->tx_ring_phys_addr;
+		tx_desc->read.cmd_type_len = rte_cpu_to_le_32(txd_lower | size);
+		tx_desc->read.olinfo_status = 0;
+
+		rte_wmb();
+		txq->tx_tail++;
+		if (txq->tx_tail == txq->nb_tx_desc)
+			txq->tx_tail = 0;
+		rte_io_wmb();
+		E1000_WRITE_REG(hw, E1000_TDT(i), txq->tx_tail);
+		usec_delay(250);
+	}
+}
+
+static void e1000_flush_rx_ring(struct rte_eth_dev *dev)
+{
+	uint32_t rctl, rxdctl;
+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	int i;
+
+	rctl = E1000_READ_REG(hw, E1000_RCTL);
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+	E1000_WRITE_FLUSH(hw);
+	usec_delay(150);
+
+	for (i = 0; i < dev->data->nb_rx_queues &&
+		i < E1000_I219_MAX_RX_QUEUE_NUM; i++) {
+		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
+		/* zero the lower 14 bits (prefetch and host thresholds) */
+		rxdctl &= 0xffffc000;
+
+		/* update thresholds: prefetch threshold to 31,
+		 * host threshold to 1 and make sure the granularity
+		 * is "descriptors" and not "cache lines"
+		 */
+		rxdctl |= (0x1F | (1UL << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
+
+		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
+	}
+	/* momentarily enable the RX ring for the changes to take effect */
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
+	E1000_WRITE_FLUSH(hw);
+	usec_delay(150);
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+}
+
+/**
+ * igb_flush_desc_rings - remove all descriptors from the descriptor rings
+ *
+ * In i219, the descriptor rings must be emptied before resetting/closing the
+ * HW. Failure to do this will cause the HW to enter a unit hang state which
+ * can only be released by PCI reset on the device
+ *
+ */
+
+void igb_flush_desc_rings(struct rte_eth_dev *dev)
+{
+	uint32_t fextnvm11, tdlen;
+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+	uint32_t hang_state = 0;
+
+	fextnvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
+	E1000_WRITE_REG(hw, E1000_FEXTNVM11,
+			fextnvm11 | E1000_FEXTNVM11_DISABLE_MULR_FIX);
+	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
+	rte_pci_read_config(pci_dev, &hang_state, sizeof(hang_state),
+				PCICFG_DESC_RING_STATUS);
+
+	/* do nothing if we're not in faulty state, or if the queue is empty */
+	if ((hang_state & FLUSH_DESC_REQUIRED) && tdlen) {
+		/* flush desc ring */
+		e1000_flush_tx_ring(dev);
+		rte_pci_read_config(pci_dev, &hang_state, sizeof(hang_state),
+					PCICFG_DESC_RING_STATUS);
+		if (hang_state & FLUSH_DESC_REQUIRED)
+			e1000_flush_rx_ring(dev);
+	}
+}
-- 
2.7.4


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [v4] net/e1000: i219 unit hang issue fix on reset/close
  2019-07-10  0:42 [dpdk-dev] [v4] net/e1000: i219 unit hang issue fix on reset/close Xiao Zhang
@ 2019-07-10  4:48 ` Anand H. Krishnan
  2019-07-16  1:03   ` Anand H. Krishnan
  2019-07-10  7:37 ` Thomas Monjalon
  2019-07-11  9:51 ` [dpdk-dev] [v5] net/e1000: fix i219 hang " Xiao Zhang
  2 siblings, 1 reply; 17+ messages in thread
From: Anand H. Krishnan @ 2019-07-10  4:48 UTC (permalink / raw)
  To: Xiao Zhang; +Cc: dev, Zhao1, Wei

More comments inline:

On Tue, Jul 9, 2019 at 9:16 PM Xiao Zhang <xiao.zhang@intel.com> wrote:
>
> Unit hang may occur if multiple descriptors are available in the rings
> during reset or close. This state can be detected by configure status
> by bit 8 in register. If the bit is set and there are pending descriptors
> in one of the rings, we must flush them before reset or close.
>
> Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>
> ---
>  drivers/net/e1000/e1000_ethdev.h |   4 ++
>  drivers/net/e1000/igb_ethdev.c   |   4 ++
>  drivers/net/e1000/igb_rxtx.c     | 105 +++++++++++++++++++++++++++++++++++++++
>  3 files changed, 113 insertions(+)
>
> diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
> index 67acb73..349144a 100644
> --- a/drivers/net/e1000/e1000_ethdev.h
> +++ b/drivers/net/e1000/e1000_ethdev.h
> @@ -35,6 +35,9 @@
>  #define IGB_MAX_RX_QUEUE_NUM           8
>  #define IGB_MAX_RX_QUEUE_NUM_82576     16
>
> +#define E1000_I219_MAX_RX_QUEUE_NUM            2
> +#define E1000_I219_MAX_TX_QUEUE_NUM            2
> +
>  #define E1000_SYN_FILTER_ENABLE        0x00000001 /* syn filter enable field */
>  #define E1000_SYN_FILTER_QUEUE         0x0000000E /* syn filter queue field */
>  #define E1000_SYN_FILTER_QUEUE_SHIFT   1          /* syn filter queue field */
> @@ -522,5 +525,6 @@ int igb_action_rss_same(const struct rte_flow_action_rss *comp,
>  int igb_config_rss_filter(struct rte_eth_dev *dev,
>                         struct igb_rte_flow_rss_conf *conf,
>                         bool add);
> +void igb_flush_desc_rings(struct rte_eth_dev *dev);
>
>  #endif /* _E1000_ETHDEV_H_ */
> diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
> index 3ee28cf..845101b 100644
> --- a/drivers/net/e1000/igb_ethdev.c
> +++ b/drivers/net/e1000/igb_ethdev.c
> @@ -1589,6 +1589,10 @@ eth_igb_close(struct rte_eth_dev *dev)
>         eth_igb_stop(dev);
>         adapter->stopped = 1;
>
> +       /* Flush desc rings for i219 */
> +       if (hw->mac.type >= e1000_pch_spt)
> +               igb_flush_desc_rings(dev);
> +

I am a bit confused as to which driver handles i219. I thought it is
the em_ethdev.c.
Also, the place to put this code, I think is more appropriate in eth_em_stop.


>         e1000_phy_hw_reset(hw);
>         igb_release_manageability(hw);
>         igb_hw_control_release(hw);
> diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
> index c5606de..cad28f3 100644
> --- a/drivers/net/e1000/igb_rxtx.c
> +++ b/drivers/net/e1000/igb_rxtx.c
> @@ -18,6 +18,7 @@
>  #include <rte_log.h>
>  #include <rte_debug.h>
>  #include <rte_pci.h>
> +#include <rte_bus_pci.h>
>  #include <rte_memory.h>
>  #include <rte_memcpy.h>
>  #include <rte_memzone.h>
> @@ -63,6 +64,10 @@
>  #define IGB_TX_OFFLOAD_NOTSUP_MASK \
>                 (PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
>
> +/* PCI offset for querying descriptor ring status*/
> +#define PCICFG_DESC_RING_STATUS           0xE4
> +#define FLUSH_DESC_REQUIRED               0x100
> +
>  /**
>   * Structure associated with each descriptor of the RX ring of a RX queue.
>   */
> @@ -2962,3 +2967,103 @@ igb_config_rss_filter(struct rte_eth_dev *dev,
>
>         return 0;
>  }
> +
> +static void e1000_flush_tx_ring(struct rte_eth_dev *dev)
> +{
> +       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +       volatile union e1000_adv_tx_desc *tx_desc;
> +       uint32_t tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
> +       uint16_t size = 512;
> +       struct igb_tx_queue *txq;
> +       int i;
> +
> +       if (dev->data->tx_queues == NULL)
> +               return;
> +       tctl = E1000_READ_REG(hw, E1000_TCTL);
> +       E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
> +       for (i = 0; i < dev->data->nb_tx_queues &&
> +               i < E1000_I219_MAX_TX_QUEUE_NUM; i++) {
> +               txq = dev->data->tx_queues[i];
> +               tdt = E1000_READ_REG(hw, E1000_TDT(i));
> +               if (tdt != txq->tx_tail)
> +                       return;
> +               tx_desc = &txq->tx_ring[txq->tx_tail];
> +               tx_desc->read.buffer_addr = txq->tx_ring_phys_addr;

Should you be using rte_cpu_to_le64 here?


> +               tx_desc->read.cmd_type_len = rte_cpu_to_le_32(txd_lower | size);

There is a lower.data  and upper.data that needs to be set, not this
field AFAIR.


> +               tx_desc->read.olinfo_status = 0;
> +
> +               rte_wmb();
> +               txq->tx_tail++;
> +               if (txq->tx_tail == txq->nb_tx_desc)
> +                       txq->tx_tail = 0;
> +               rte_io_wmb();
> +               E1000_WRITE_REG(hw, E1000_TDT(i), txq->tx_tail);

Should you be using E1000_PCI_REG_WRITE_RELAXED. Can you just check
other instances of how this register is written?

Thanks,
Anand


> +               usec_delay(250);
> +       }
> +}
> +
> +static void e1000_flush_rx_ring(struct rte_eth_dev *dev)
> +{
> +       uint32_t rctl, rxdctl;
> +       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +       int i;
> +
> +       rctl = E1000_READ_REG(hw, E1000_RCTL);
> +       E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
> +       E1000_WRITE_FLUSH(hw);
> +       usec_delay(150);
> +
> +       for (i = 0; i < dev->data->nb_rx_queues &&
> +               i < E1000_I219_MAX_RX_QUEUE_NUM; i++) {
> +               rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
> +               /* zero the lower 14 bits (prefetch and host thresholds) */
> +               rxdctl &= 0xffffc000;
> +
> +               /* update thresholds: prefetch threshold to 31,
> +                * host threshold to 1 and make sure the granularity
> +                * is "descriptors" and not "cache lines"
> +                */
> +               rxdctl |= (0x1F | (1UL << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
> +
> +               E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
> +       }
> +       /* momentarily enable the RX ring for the changes to take effect */
> +       E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
> +       E1000_WRITE_FLUSH(hw);
> +       usec_delay(150);
> +       E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
> +}
> +
> +/**
> + * igb_flush_desc_rings - remove all descriptors from the descriptor rings
> + *
> + * In i219, the descriptor rings must be emptied before resetting/closing the
> + * HW. Failure to do this will cause the HW to enter a unit hang state which
> + * can only be released by PCI reset on the device
> + *
> + */
> +
> +void igb_flush_desc_rings(struct rte_eth_dev *dev)
> +{
> +       uint32_t fextnvm11, tdlen;
> +       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> +       uint32_t hang_state = 0;
> +
> +       fextnvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
> +       E1000_WRITE_REG(hw, E1000_FEXTNVM11,
> +                       fextnvm11 | E1000_FEXTNVM11_DISABLE_MULR_FIX);
> +       tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
> +       rte_pci_read_config(pci_dev, &hang_state, sizeof(hang_state),
> +                               PCICFG_DESC_RING_STATUS);
> +
> +       /* do nothing if we're not in faulty state, or if the queue is empty */
> +       if ((hang_state & FLUSH_DESC_REQUIRED) && tdlen) {
> +               /* flush desc ring */
> +               e1000_flush_tx_ring(dev);
> +               rte_pci_read_config(pci_dev, &hang_state, sizeof(hang_state),
> +                                       PCICFG_DESC_RING_STATUS);
> +               if (hang_state & FLUSH_DESC_REQUIRED)
> +                       e1000_flush_rx_ring(dev);
> +       }
> +}
> --
> 2.7.4
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [v4] net/e1000: i219 unit hang issue fix on reset/close
  2019-07-10  0:42 [dpdk-dev] [v4] net/e1000: i219 unit hang issue fix on reset/close Xiao Zhang
  2019-07-10  4:48 ` Anand H. Krishnan
@ 2019-07-10  7:37 ` Thomas Monjalon
  2019-07-11  9:51 ` [dpdk-dev] [v5] net/e1000: fix i219 hang " Xiao Zhang
  2 siblings, 0 replies; 17+ messages in thread
From: Thomas Monjalon @ 2019-07-10  7:37 UTC (permalink / raw)
  To: Xiao Zhang, qi.z.zhang; +Cc: dev, wei.zhao1, anandhkrishnan

Xiao, Qi,
Please take care of keeping all versions of the patch in the same thread,
by using --in-reply-to with git-send-email.
When a new version is sent, please delegate it to the right maintainer
(Qi in this case) and set the old version as superseded in patchwork.

About the title,
"fix" must be the first word,
"hang issue" can be shortened with just "hang"
("issue" is never needed in a title, especially after "fix").
So it can be "net/e1000: fix i219 hang on reset/close"

Thanks



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [v5] net/e1000: fix i219 hang on reset/close
  2019-07-10  0:42 [dpdk-dev] [v4] net/e1000: i219 unit hang issue fix on reset/close Xiao Zhang
  2019-07-10  4:48 ` Anand H. Krishnan
  2019-07-10  7:37 ` Thomas Monjalon
@ 2019-07-11  9:51 ` Xiao Zhang
  2019-07-22 11:19   ` [dpdk-dev] [v6] " Xiao Zhang
  2 siblings, 1 reply; 17+ messages in thread
From: Xiao Zhang @ 2019-07-11  9:51 UTC (permalink / raw)
  To: dev; +Cc: wei.zhao1, anandhkrishnan, Xiao Zhang

Unit hang may occur if multiple descriptors are available in the rings
during reset or close. This state can be detected by configure status
by bit 8 in register. If the bit is set and there are pending descriptors
in one of the rings, we must flush them before reset or close.

Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>
---
 drivers/net/e1000/e1000_ethdev.h |   4 ++
 drivers/net/e1000/igb_ethdev.c   |   4 ++
 drivers/net/e1000/igb_rxtx.c     | 105 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 113 insertions(+)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 67acb73..349144a 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -35,6 +35,9 @@
 #define IGB_MAX_RX_QUEUE_NUM           8
 #define IGB_MAX_RX_QUEUE_NUM_82576     16
 
+#define E1000_I219_MAX_RX_QUEUE_NUM		2
+#define E1000_I219_MAX_TX_QUEUE_NUM		2
+
 #define E1000_SYN_FILTER_ENABLE        0x00000001 /* syn filter enable field */
 #define E1000_SYN_FILTER_QUEUE         0x0000000E /* syn filter queue field */
 #define E1000_SYN_FILTER_QUEUE_SHIFT   1          /* syn filter queue field */
@@ -522,5 +525,6 @@ int igb_action_rss_same(const struct rte_flow_action_rss *comp,
 int igb_config_rss_filter(struct rte_eth_dev *dev,
 			struct igb_rte_flow_rss_conf *conf,
 			bool add);
+void igb_flush_desc_rings(struct rte_eth_dev *dev);
 
 #endif /* _E1000_ETHDEV_H_ */
diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index 3ee28cf..845101b 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -1589,6 +1589,10 @@ eth_igb_close(struct rte_eth_dev *dev)
 	eth_igb_stop(dev);
 	adapter->stopped = 1;
 
+	/* Flush desc rings for i219 */
+	if (hw->mac.type >= e1000_pch_spt)
+		igb_flush_desc_rings(dev);
+
 	e1000_phy_hw_reset(hw);
 	igb_release_manageability(hw);
 	igb_hw_control_release(hw);
diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
index c5606de..cad28f3 100644
--- a/drivers/net/e1000/igb_rxtx.c
+++ b/drivers/net/e1000/igb_rxtx.c
@@ -18,6 +18,7 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_memory.h>
 #include <rte_memcpy.h>
 #include <rte_memzone.h>
@@ -63,6 +64,10 @@
 #define IGB_TX_OFFLOAD_NOTSUP_MASK \
 		(PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
 
+/* PCI offset for querying descriptor ring status*/
+#define PCICFG_DESC_RING_STATUS           0xE4
+#define FLUSH_DESC_REQUIRED               0x100
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -2962,3 +2967,103 @@ igb_config_rss_filter(struct rte_eth_dev *dev,
 
 	return 0;
 }
+
+static void e1000_flush_tx_ring(struct rte_eth_dev *dev)
+{
+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	volatile union e1000_adv_tx_desc *tx_desc;
+	uint32_t tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
+	uint16_t size = 512;
+	struct igb_tx_queue *txq;
+	int i;
+
+	if (dev->data->tx_queues == NULL)
+		return;
+	tctl = E1000_READ_REG(hw, E1000_TCTL);
+	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
+	for (i = 0; i < dev->data->nb_tx_queues &&
+		i < E1000_I219_MAX_TX_QUEUE_NUM; i++) {
+		txq = dev->data->tx_queues[i];
+		tdt = E1000_READ_REG(hw, E1000_TDT(i));
+		if (tdt != txq->tx_tail)
+			return;
+		tx_desc = &txq->tx_ring[txq->tx_tail];
+		tx_desc->read.buffer_addr = txq->tx_ring_phys_addr;
+		tx_desc->read.cmd_type_len = rte_cpu_to_le_32(txd_lower | size);
+		tx_desc->read.olinfo_status = 0;
+
+		rte_wmb();
+		txq->tx_tail++;
+		if (txq->tx_tail == txq->nb_tx_desc)
+			txq->tx_tail = 0;
+		rte_io_wmb();
+		E1000_WRITE_REG(hw, E1000_TDT(i), txq->tx_tail);
+		usec_delay(250);
+	}
+}
+
+static void e1000_flush_rx_ring(struct rte_eth_dev *dev)
+{
+	uint32_t rctl, rxdctl;
+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	int i;
+
+	rctl = E1000_READ_REG(hw, E1000_RCTL);
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+	E1000_WRITE_FLUSH(hw);
+	usec_delay(150);
+
+	for (i = 0; i < dev->data->nb_rx_queues &&
+		i < E1000_I219_MAX_RX_QUEUE_NUM; i++) {
+		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
+		/* zero the lower 14 bits (prefetch and host thresholds) */
+		rxdctl &= 0xffffc000;
+
+		/* update thresholds: prefetch threshold to 31,
+		 * host threshold to 1 and make sure the granularity
+		 * is "descriptors" and not "cache lines"
+		 */
+		rxdctl |= (0x1F | (1UL << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
+
+		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
+	}
+	/* momentarily enable the RX ring for the changes to take effect */
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
+	E1000_WRITE_FLUSH(hw);
+	usec_delay(150);
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+}
+
+/**
+ * igb_flush_desc_rings - remove all descriptors from the descriptor rings
+ *
+ * In i219, the descriptor rings must be emptied before resetting/closing the
+ * HW. Failure to do this will cause the HW to enter a unit hang state which
+ * can only be released by PCI reset on the device
+ *
+ */
+
+void igb_flush_desc_rings(struct rte_eth_dev *dev)
+{
+	uint32_t fextnvm11, tdlen;
+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+	uint32_t hang_state = 0;
+
+	fextnvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
+	E1000_WRITE_REG(hw, E1000_FEXTNVM11,
+			fextnvm11 | E1000_FEXTNVM11_DISABLE_MULR_FIX);
+	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
+	rte_pci_read_config(pci_dev, &hang_state, sizeof(hang_state),
+				PCICFG_DESC_RING_STATUS);
+
+	/* do nothing if we're not in faulty state, or if the queue is empty */
+	if ((hang_state & FLUSH_DESC_REQUIRED) && tdlen) {
+		/* flush desc ring */
+		e1000_flush_tx_ring(dev);
+		rte_pci_read_config(pci_dev, &hang_state, sizeof(hang_state),
+					PCICFG_DESC_RING_STATUS);
+		if (hang_state & FLUSH_DESC_REQUIRED)
+			e1000_flush_rx_ring(dev);
+	}
+}
-- 
2.7.4


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [v4] net/e1000: i219 unit hang issue fix on reset/close
  2019-07-10  4:48 ` Anand H. Krishnan
@ 2019-07-16  1:03   ` Anand H. Krishnan
  2019-07-16  2:58     ` Zhang, Xiao
  0 siblings, 1 reply; 17+ messages in thread
From: Anand H. Krishnan @ 2019-07-16  1:03 UTC (permalink / raw)
  To: Xiao Zhang; +Cc: dev, Zhao1, Wei

Xiao,

I didn't hear back from you on the comments.

Thanks,
Anand

On Wed, Jul 10, 2019 at 10:18 AM Anand H. Krishnan
<anandhkrishnan@gmail.com> wrote:
>
> More comments inline:
>
> On Tue, Jul 9, 2019 at 9:16 PM Xiao Zhang <xiao.zhang@intel.com> wrote:
> >
> > Unit hang may occur if multiple descriptors are available in the rings
> > during reset or close. This state can be detected by configure status
> > by bit 8 in register. If the bit is set and there are pending descriptors
> > in one of the rings, we must flush them before reset or close.
> >
> > Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>
> > ---
> >  drivers/net/e1000/e1000_ethdev.h |   4 ++
> >  drivers/net/e1000/igb_ethdev.c   |   4 ++
> >  drivers/net/e1000/igb_rxtx.c     | 105 +++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 113 insertions(+)
> >
> > diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
> > index 67acb73..349144a 100644
> > --- a/drivers/net/e1000/e1000_ethdev.h
> > +++ b/drivers/net/e1000/e1000_ethdev.h
> > @@ -35,6 +35,9 @@
> >  #define IGB_MAX_RX_QUEUE_NUM           8
> >  #define IGB_MAX_RX_QUEUE_NUM_82576     16
> >
> > +#define E1000_I219_MAX_RX_QUEUE_NUM            2
> > +#define E1000_I219_MAX_TX_QUEUE_NUM            2
> > +
> >  #define E1000_SYN_FILTER_ENABLE        0x00000001 /* syn filter enable field */
> >  #define E1000_SYN_FILTER_QUEUE         0x0000000E /* syn filter queue field */
> >  #define E1000_SYN_FILTER_QUEUE_SHIFT   1          /* syn filter queue field */
> > @@ -522,5 +525,6 @@ int igb_action_rss_same(const struct rte_flow_action_rss *comp,
> >  int igb_config_rss_filter(struct rte_eth_dev *dev,
> >                         struct igb_rte_flow_rss_conf *conf,
> >                         bool add);
> > +void igb_flush_desc_rings(struct rte_eth_dev *dev);
> >
> >  #endif /* _E1000_ETHDEV_H_ */
> > diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
> > index 3ee28cf..845101b 100644
> > --- a/drivers/net/e1000/igb_ethdev.c
> > +++ b/drivers/net/e1000/igb_ethdev.c
> > @@ -1589,6 +1589,10 @@ eth_igb_close(struct rte_eth_dev *dev)
> >         eth_igb_stop(dev);
> >         adapter->stopped = 1;
> >
> > +       /* Flush desc rings for i219 */
> > +       if (hw->mac.type >= e1000_pch_spt)
> > +               igb_flush_desc_rings(dev);
> > +
>
> I am a bit confused as to which driver handles i219. I thought it is
> the em_ethdev.c.
> Also, the place to put this code, I think is more appropriate in eth_em_stop.
>
>
> >         e1000_phy_hw_reset(hw);
> >         igb_release_manageability(hw);
> >         igb_hw_control_release(hw);
> > diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
> > index c5606de..cad28f3 100644
> > --- a/drivers/net/e1000/igb_rxtx.c
> > +++ b/drivers/net/e1000/igb_rxtx.c
> > @@ -18,6 +18,7 @@
> >  #include <rte_log.h>
> >  #include <rte_debug.h>
> >  #include <rte_pci.h>
> > +#include <rte_bus_pci.h>
> >  #include <rte_memory.h>
> >  #include <rte_memcpy.h>
> >  #include <rte_memzone.h>
> > @@ -63,6 +64,10 @@
> >  #define IGB_TX_OFFLOAD_NOTSUP_MASK \
> >                 (PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
> >
> > +/* PCI offset for querying descriptor ring status*/
> > +#define PCICFG_DESC_RING_STATUS           0xE4
> > +#define FLUSH_DESC_REQUIRED               0x100
> > +
> >  /**
> >   * Structure associated with each descriptor of the RX ring of a RX queue.
> >   */
> > @@ -2962,3 +2967,103 @@ igb_config_rss_filter(struct rte_eth_dev *dev,
> >
> >         return 0;
> >  }
> > +
> > +static void e1000_flush_tx_ring(struct rte_eth_dev *dev)
> > +{
> > +       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > +       volatile union e1000_adv_tx_desc *tx_desc;
> > +       uint32_t tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
> > +       uint16_t size = 512;
> > +       struct igb_tx_queue *txq;
> > +       int i;
> > +
> > +       if (dev->data->tx_queues == NULL)
> > +               return;
> > +       tctl = E1000_READ_REG(hw, E1000_TCTL);
> > +       E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
> > +       for (i = 0; i < dev->data->nb_tx_queues &&
> > +               i < E1000_I219_MAX_TX_QUEUE_NUM; i++) {
> > +               txq = dev->data->tx_queues[i];
> > +               tdt = E1000_READ_REG(hw, E1000_TDT(i));
> > +               if (tdt != txq->tx_tail)
> > +                       return;
> > +               tx_desc = &txq->tx_ring[txq->tx_tail];
> > +               tx_desc->read.buffer_addr = txq->tx_ring_phys_addr;
>
> Should you be using rte_cpu_to_le64 here?
>
>
> > +               tx_desc->read.cmd_type_len = rte_cpu_to_le_32(txd_lower | size);
>
> There is a lower.data  and upper.data that needs to be set, not this
> field AFAIR.
>
>
> > +               tx_desc->read.olinfo_status = 0;
> > +
> > +               rte_wmb();
> > +               txq->tx_tail++;
> > +               if (txq->tx_tail == txq->nb_tx_desc)
> > +                       txq->tx_tail = 0;
> > +               rte_io_wmb();
> > +               E1000_WRITE_REG(hw, E1000_TDT(i), txq->tx_tail);
>
> Should you be using E1000_PCI_REG_WRITE_RELAXED. Can you just check
> other instances of how this register is written?
>
> Thanks,
> Anand
>
>
> > +               usec_delay(250);
> > +       }
> > +}
> > +
> > +static void e1000_flush_rx_ring(struct rte_eth_dev *dev)
> > +{
> > +       uint32_t rctl, rxdctl;
> > +       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > +       int i;
> > +
> > +       rctl = E1000_READ_REG(hw, E1000_RCTL);
> > +       E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
> > +       E1000_WRITE_FLUSH(hw);
> > +       usec_delay(150);
> > +
> > +       for (i = 0; i < dev->data->nb_rx_queues &&
> > +               i < E1000_I219_MAX_RX_QUEUE_NUM; i++) {
> > +               rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
> > +               /* zero the lower 14 bits (prefetch and host thresholds) */
> > +               rxdctl &= 0xffffc000;
> > +
> > +               /* update thresholds: prefetch threshold to 31,
> > +                * host threshold to 1 and make sure the granularity
> > +                * is "descriptors" and not "cache lines"
> > +                */
> > +               rxdctl |= (0x1F | (1UL << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
> > +
> > +               E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
> > +       }
> > +       /* momentarily enable the RX ring for the changes to take effect */
> > +       E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
> > +       E1000_WRITE_FLUSH(hw);
> > +       usec_delay(150);
> > +       E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
> > +}
> > +
> > +/**
> > + * igb_flush_desc_rings - remove all descriptors from the descriptor rings
> > + *
> > + * In i219, the descriptor rings must be emptied before resetting/closing the
> > + * HW. Failure to do this will cause the HW to enter a unit hang state which
> > + * can only be released by PCI reset on the device
> > + *
> > + */
> > +
> > +void igb_flush_desc_rings(struct rte_eth_dev *dev)
> > +{
> > +       uint32_t fextnvm11, tdlen;
> > +       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > +       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> > +       uint32_t hang_state = 0;
> > +
> > +       fextnvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
> > +       E1000_WRITE_REG(hw, E1000_FEXTNVM11,
> > +                       fextnvm11 | E1000_FEXTNVM11_DISABLE_MULR_FIX);
> > +       tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
> > +       rte_pci_read_config(pci_dev, &hang_state, sizeof(hang_state),
> > +                               PCICFG_DESC_RING_STATUS);
> > +
> > +       /* do nothing if we're not in faulty state, or if the queue is empty */
> > +       if ((hang_state & FLUSH_DESC_REQUIRED) && tdlen) {
> > +               /* flush desc ring */
> > +               e1000_flush_tx_ring(dev);
> > +               rte_pci_read_config(pci_dev, &hang_state, sizeof(hang_state),
> > +                                       PCICFG_DESC_RING_STATUS);
> > +               if (hang_state & FLUSH_DESC_REQUIRED)
> > +                       e1000_flush_rx_ring(dev);
> > +       }
> > +}
> > --
> > 2.7.4
> >

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [v4] net/e1000: i219 unit hang issue fix on reset/close
  2019-07-16  1:03   ` Anand H. Krishnan
@ 2019-07-16  2:58     ` Zhang, Xiao
  0 siblings, 0 replies; 17+ messages in thread
From: Zhang, Xiao @ 2019-07-16  2:58 UTC (permalink / raw)
  To: Anand H. Krishnan; +Cc: dev, Zhao1, Wei

Hi Anand,

Sorry, I didn't get your last email.

Thanks,
Xiao

> -----Original Message-----
> From: Anand H. Krishnan [mailto:anandhkrishnan@gmail.com]
> Sent: Tuesday, July 16, 2019 9:03 AM
> To: Zhang, Xiao <xiao.zhang@intel.com>
> Cc: dev@dpdk.org; Zhao1, Wei <wei.zhao1@intel.com>
> Subject: Re: [v4] net/e1000: i219 unit hang issue fix on reset/close
> 
> Xiao,
> 
> I didn't hear back from you on the comments.
> 
> Thanks,
> Anand
> 
> On Wed, Jul 10, 2019 at 10:18 AM Anand H. Krishnan
> <anandhkrishnan@gmail.com> wrote:
> >
> > More comments inline:
> >
> > On Tue, Jul 9, 2019 at 9:16 PM Xiao Zhang <xiao.zhang@intel.com> wrote:
> > >
> > > Unit hang may occur if multiple descriptors are available in the
> > > rings during reset or close. This state can be detected by configure
> > > status by bit 8 in register. If the bit is set and there are pending
> > > descriptors in one of the rings, we must flush them before reset or close.
> > >
> > > Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>
> > > ---
> > >  drivers/net/e1000/e1000_ethdev.h |   4 ++
> > >  drivers/net/e1000/igb_ethdev.c   |   4 ++
> > >  drivers/net/e1000/igb_rxtx.c     | 105
> +++++++++++++++++++++++++++++++++++++++
> > >  3 files changed, 113 insertions(+)
> > >
> > > diff --git a/drivers/net/e1000/e1000_ethdev.h
> > > b/drivers/net/e1000/e1000_ethdev.h
> > > index 67acb73..349144a 100644
> > > --- a/drivers/net/e1000/e1000_ethdev.h
> > > +++ b/drivers/net/e1000/e1000_ethdev.h
> > > @@ -35,6 +35,9 @@
> > >  #define IGB_MAX_RX_QUEUE_NUM           8
> > >  #define IGB_MAX_RX_QUEUE_NUM_82576     16
> > >
> > > +#define E1000_I219_MAX_RX_QUEUE_NUM            2
> > > +#define E1000_I219_MAX_TX_QUEUE_NUM            2
> > > +
> > >  #define E1000_SYN_FILTER_ENABLE        0x00000001 /* syn filter enable
> field */
> > >  #define E1000_SYN_FILTER_QUEUE         0x0000000E /* syn filter queue
> field */
> > >  #define E1000_SYN_FILTER_QUEUE_SHIFT   1          /* syn filter queue field
> */
> > > @@ -522,5 +525,6 @@ int igb_action_rss_same(const struct
> > > rte_flow_action_rss *comp,  int igb_config_rss_filter(struct rte_eth_dev
> *dev,
> > >                         struct igb_rte_flow_rss_conf *conf,
> > >                         bool add);
> > > +void igb_flush_desc_rings(struct rte_eth_dev *dev);
> > >
> > >  #endif /* _E1000_ETHDEV_H_ */
> > > diff --git a/drivers/net/e1000/igb_ethdev.c
> > > b/drivers/net/e1000/igb_ethdev.c index 3ee28cf..845101b 100644
> > > --- a/drivers/net/e1000/igb_ethdev.c
> > > +++ b/drivers/net/e1000/igb_ethdev.c
> > > @@ -1589,6 +1589,10 @@ eth_igb_close(struct rte_eth_dev *dev)
> > >         eth_igb_stop(dev);
> > >         adapter->stopped = 1;
> > >
> > > +       /* Flush desc rings for i219 */
> > > +       if (hw->mac.type >= e1000_pch_spt)
> > > +               igb_flush_desc_rings(dev);
> > > +
> >
> > I am a bit confused as to which driver handles i219. I thought it is
> > the em_ethdev.c.
> > Also, the place to put this code, I think is more appropriate in eth_em_stop.

Yes, I checked with the card, it used em_ethdev driver and adding this code in stop ops will be better.

> >
> >
> > >         e1000_phy_hw_reset(hw);
> > >         igb_release_manageability(hw);
> > >         igb_hw_control_release(hw);
> > > diff --git a/drivers/net/e1000/igb_rxtx.c
> > > b/drivers/net/e1000/igb_rxtx.c index c5606de..cad28f3 100644
> > > --- a/drivers/net/e1000/igb_rxtx.c
> > > +++ b/drivers/net/e1000/igb_rxtx.c
> > > @@ -18,6 +18,7 @@
> > >  #include <rte_log.h>
> > >  #include <rte_debug.h>
> > >  #include <rte_pci.h>
> > > +#include <rte_bus_pci.h>
> > >  #include <rte_memory.h>
> > >  #include <rte_memcpy.h>
> > >  #include <rte_memzone.h>
> > > @@ -63,6 +64,10 @@
> > >  #define IGB_TX_OFFLOAD_NOTSUP_MASK \
> > >                 (PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
> > >
> > > +/* PCI offset for querying descriptor ring status*/
> > > +#define PCICFG_DESC_RING_STATUS           0xE4
> > > +#define FLUSH_DESC_REQUIRED               0x100
> > > +
> > >  /**
> > >   * Structure associated with each descriptor of the RX ring of a RX queue.
> > >   */
> > > @@ -2962,3 +2967,103 @@ igb_config_rss_filter(struct rte_eth_dev
> > > *dev,
> > >
> > >         return 0;
> > >  }
> > > +
> > > +static void e1000_flush_tx_ring(struct rte_eth_dev *dev) {
> > > +       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> > > +       volatile union e1000_adv_tx_desc *tx_desc;
> > > +       uint32_t tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
> > > +       uint16_t size = 512;
> > > +       struct igb_tx_queue *txq;
> > > +       int i;
> > > +
> > > +       if (dev->data->tx_queues == NULL)
> > > +               return;
> > > +       tctl = E1000_READ_REG(hw, E1000_TCTL);
> > > +       E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
> > > +       for (i = 0; i < dev->data->nb_tx_queues &&
> > > +               i < E1000_I219_MAX_TX_QUEUE_NUM; i++) {
> > > +               txq = dev->data->tx_queues[i];
> > > +               tdt = E1000_READ_REG(hw, E1000_TDT(i));
> > > +               if (tdt != txq->tx_tail)
> > > +                       return;
> > > +               tx_desc = &txq->tx_ring[txq->tx_tail];
> > > +               tx_desc->read.buffer_addr = txq->tx_ring_phys_addr;
> >
> > Should you be using rte_cpu_to_le64 here?

Will add conversion for this field.

> >
> >
> > > +               tx_desc->read.cmd_type_len =
> > > + rte_cpu_to_le_32(txd_lower | size);
> >
> > There is a lower.data  and upper.data that needs to be set, not this
> > field AFAIR.

As I checked the kernel driver, in the struct e1000_tx_desc, the lower and upper data are cmd/length and status.

> >
> >
> > > +               tx_desc->read.olinfo_status = 0;
> > > +
> > > +               rte_wmb();
> > > +               txq->tx_tail++;
> > > +               if (txq->tx_tail == txq->nb_tx_desc)
> > > +                       txq->tx_tail = 0;
> > > +               rte_io_wmb();
> > > +               E1000_WRITE_REG(hw, E1000_TDT(i), txq->tx_tail);
> >
> > Should you be using E1000_PCI_REG_WRITE_RELAXED. Can you just check
> > other instances of how this register is written?

Will use E1000_PCI_REG_WRITE_RELAXED to write register.

> >
> > Thanks,
> > Anand
> >
> >
> > > +               usec_delay(250);
> > > +       }
> > > +}
> > > +
> > > +static void e1000_flush_rx_ring(struct rte_eth_dev *dev) {
> > > +       uint32_t rctl, rxdctl;
> > > +       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> > > +       int i;
> > > +
> > > +       rctl = E1000_READ_REG(hw, E1000_RCTL);
> > > +       E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
> > > +       E1000_WRITE_FLUSH(hw);
> > > +       usec_delay(150);
> > > +
> > > +       for (i = 0; i < dev->data->nb_rx_queues &&
> > > +               i < E1000_I219_MAX_RX_QUEUE_NUM; i++) {
> > > +               rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
> > > +               /* zero the lower 14 bits (prefetch and host thresholds) */
> > > +               rxdctl &= 0xffffc000;
> > > +
> > > +               /* update thresholds: prefetch threshold to 31,
> > > +                * host threshold to 1 and make sure the granularity
> > > +                * is "descriptors" and not "cache lines"
> > > +                */
> > > +               rxdctl |= (0x1F | (1UL << 8) |
> > > + E1000_RXDCTL_THRESH_UNIT_DESC);
> > > +
> > > +               E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
> > > +       }
> > > +       /* momentarily enable the RX ring for the changes to take effect */
> > > +       E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
> > > +       E1000_WRITE_FLUSH(hw);
> > > +       usec_delay(150);
> > > +       E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); }
> > > +
> > > +/**
> > > + * igb_flush_desc_rings - remove all descriptors from the
> > > +descriptor rings
> > > + *
> > > + * In i219, the descriptor rings must be emptied before
> > > +resetting/closing the
> > > + * HW. Failure to do this will cause the HW to enter a unit hang
> > > +state which
> > > + * can only be released by PCI reset on the device
> > > + *
> > > + */
> > > +
> > > +void igb_flush_desc_rings(struct rte_eth_dev *dev) {
> > > +       uint32_t fextnvm11, tdlen;
> > > +       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> > > +       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> > > +       uint32_t hang_state = 0;
> > > +
> > > +       fextnvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
> > > +       E1000_WRITE_REG(hw, E1000_FEXTNVM11,
> > > +                       fextnvm11 | E1000_FEXTNVM11_DISABLE_MULR_FIX);
> > > +       tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
> > > +       rte_pci_read_config(pci_dev, &hang_state, sizeof(hang_state),
> > > +                               PCICFG_DESC_RING_STATUS);
> > > +
> > > +       /* do nothing if we're not in faulty state, or if the queue is empty */
> > > +       if ((hang_state & FLUSH_DESC_REQUIRED) && tdlen) {
> > > +               /* flush desc ring */
> > > +               e1000_flush_tx_ring(dev);
> > > +               rte_pci_read_config(pci_dev, &hang_state, sizeof(hang_state),
> > > +                                       PCICFG_DESC_RING_STATUS);
> > > +               if (hang_state & FLUSH_DESC_REQUIRED)
> > > +                       e1000_flush_rx_ring(dev);
> > > +       }
> > > +}
> > > --
> > > 2.7.4
> > >

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [v6] net/e1000: fix i219 hang on reset/close
  2019-07-22  9:26     ` Ye Xiaolong
@ 2019-07-22  3:18       ` Zhang, Xiao
  0 siblings, 0 replies; 17+ messages in thread
From: Zhang, Xiao @ 2019-07-22  3:18 UTC (permalink / raw)
  To: Ye, Xiaolong; +Cc: dev, Lu, Wenzhuo, Zhao1, Wei, stable



> -----Original Message-----
> From: Ye, Xiaolong
> Sent: Monday, July 22, 2019 5:27 PM
> To: Zhang, Xiao <xiao.zhang@intel.com>
> Cc: dev@dpdk.org; Lu, Wenzhuo <wenzhuo.lu@intel.com>; Zhao1, Wei
> <wei.zhao1@intel.com>; stable@dpdk.org
> Subject: Re: [dpdk-dev] [v6] net/e1000: fix i219 hang on reset/close
> 
> On 07/22, Xiao Zhang wrote:
> >Unit hang may occur if multiple descriptors are available in the rings
> >during reset or close. This state can be detected by configure status
> >by bit 8 in register. If the bit is set and there are pending
> >descriptors in one of the rings, we must flush them before reset or
> >close.
> >
> >Cc: stable@dpdk.org
> 
> May need a Fixes tag here.

Sure, will send another patch with Fixes line included.

Thanks,
Xiao
> 
> Thanks,
> Xiaolong
> 
> >
> >Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>
> >---
> >v6 Change the fix on em driver instead of igb driver and update the
> >register address according to C-Spec.
> >v5 Change the subject.
> >v4 Correct the tail descriptor of tx ring.
> >v3 Add loop to handle all tx and rx queues.
> >v2 Use configuration register instead of NVM7 to get the hang state.
> >---
> > drivers/net/e1000/e1000_ethdev.h |   4 ++
> > drivers/net/e1000/em_ethdev.c    |   5 ++
> > drivers/net/e1000/em_rxtx.c      | 108
> +++++++++++++++++++++++++++++++++++++++
> > 3 files changed, 117 insertions(+)
> >
> >diff --git a/drivers/net/e1000/e1000_ethdev.h
> >b/drivers/net/e1000/e1000_ethdev.h
> >index 67acb73..01ff943 100644
> >--- a/drivers/net/e1000/e1000_ethdev.h
> >+++ b/drivers/net/e1000/e1000_ethdev.h
> >@@ -35,6 +35,9 @@
> > #define IGB_MAX_RX_QUEUE_NUM           8
> > #define IGB_MAX_RX_QUEUE_NUM_82576     16
> >
> >+#define E1000_I219_MAX_RX_QUEUE_NUM		2
> >+#define E1000_I219_MAX_TX_QUEUE_NUM		2
> >+
> > #define E1000_SYN_FILTER_ENABLE        0x00000001 /* syn filter enable
> field */
> > #define E1000_SYN_FILTER_QUEUE         0x0000000E /* syn filter queue
> field */
> > #define E1000_SYN_FILTER_QUEUE_SHIFT   1          /* syn filter queue field
> */
> >@@ -522,5 +525,6 @@ int igb_action_rss_same(const struct
> >rte_flow_action_rss *comp,  int igb_config_rss_filter(struct rte_eth_dev
> *dev,
> > 			struct igb_rte_flow_rss_conf *conf,
> > 			bool add);
> >+void em_flush_desc_rings(struct rte_eth_dev *dev);
> >
> > #endif /* _E1000_ETHDEV_H_ */
> >diff --git a/drivers/net/e1000/em_ethdev.c
> >b/drivers/net/e1000/em_ethdev.c index dc88661..62d3a95 100644
> >--- a/drivers/net/e1000/em_ethdev.c
> >+++ b/drivers/net/e1000/em_ethdev.c
> >@@ -738,6 +738,11 @@ eth_em_stop(struct rte_eth_dev *dev)
> > 	em_lsc_intr_disable(hw);
> >
> > 	e1000_reset_hw(hw);
> >+
> >+	/* Flush desc rings for i219 */
> >+	if (hw->mac.type >= e1000_pch_spt)
> >+		em_flush_desc_rings(dev);
> >+
> > 	if (hw->mac.type >= e1000_82544)
> > 		E1000_WRITE_REG(hw, E1000_WUC, 0);
> >
> >diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
> >index 708f832..880fc7e 100644
> >--- a/drivers/net/e1000/em_rxtx.c
> >+++ b/drivers/net/e1000/em_rxtx.c
> >@@ -18,6 +18,7 @@
> > #include <rte_log.h>
> > #include <rte_debug.h>
> > #include <rte_pci.h>
> >+#include <rte_bus_pci.h>
> > #include <rte_memory.h>
> > #include <rte_memcpy.h>
> > #include <rte_memzone.h>
> >@@ -59,6 +60,11 @@
> > #define E1000_TX_OFFLOAD_NOTSUP_MASK \
> > 		(PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
> >
> >+/* PCI offset for querying configuration status register */
> >+#define PCI_CFG_STATUS_REG                 0x06
> >+#define FLUSH_DESC_REQUIRED               0x100
> >+
> >+
> > /**
> >  * Structure associated with each descriptor of the RX ring of a RX queue.
> >  */
> >@@ -2000,3 +2006,105 @@ em_txq_info_get(struct rte_eth_dev *dev,
> uint16_t queue_id,
> > 	qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
> > 	qinfo->conf.offloads = txq->offloads;  }
> >+
> >+static void e1000_flush_tx_ring(struct rte_eth_dev *dev) {
> >+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> >+	volatile struct e1000_data_desc *tx_desc;
> >+	volatile uint32_t *tdt_reg_addr;
> >+	uint32_t tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
> >+	uint16_t size = 512;
> >+	struct em_tx_queue *txq;
> >+	int i;
> >+
> >+	if (dev->data->tx_queues == NULL)
> >+		return;
> >+	tctl = E1000_READ_REG(hw, E1000_TCTL);
> >+	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
> >+	for (i = 0; i < dev->data->nb_tx_queues &&
> >+		i < E1000_I219_MAX_TX_QUEUE_NUM; i++) {
> >+		txq = dev->data->tx_queues[i];
> >+		tdt = E1000_READ_REG(hw, E1000_TDT(i));
> >+		if (tdt != txq->tx_tail)
> >+			return;
> >+		tx_desc = &txq->tx_ring[txq->tx_tail];
> >+		tx_desc->buffer_addr = rte_cpu_to_le_64(txq-
> >tx_ring_phys_addr);
> >+		tx_desc->lower.data = rte_cpu_to_le_32(txd_lower | size);
> >+		tx_desc->upper.data = 0;
> >+
> >+		rte_wmb();
> >+		txq->tx_tail++;
> >+		if (txq->tx_tail == txq->nb_tx_desc)
> >+			txq->tx_tail = 0;
> >+		rte_io_wmb();
> >+		tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(i));
> >+		E1000_PCI_REG_WRITE_RELAXED(tdt_reg_addr, txq-
> >tx_tail);
> >+		usec_delay(250);
> >+	}
> >+}
> >+
> >+static void e1000_flush_rx_ring(struct rte_eth_dev *dev) {
> >+	uint32_t rctl, rxdctl;
> >+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> >+	int i;
> >+
> >+	rctl = E1000_READ_REG(hw, E1000_RCTL);
> >+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
> >+	E1000_WRITE_FLUSH(hw);
> >+	usec_delay(150);
> >+
> >+	for (i = 0; i < dev->data->nb_rx_queues &&
> >+		i < E1000_I219_MAX_RX_QUEUE_NUM; i++) {
> >+		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
> >+		/* zero the lower 14 bits (prefetch and host thresholds) */
> >+		rxdctl &= 0xffffc000;
> >+
> >+		/* update thresholds: prefetch threshold to 31,
> >+		 * host threshold to 1 and make sure the granularity
> >+		 * is "descriptors" and not "cache lines"
> >+		 */
> >+		rxdctl |= (0x1F | (1UL << 8) |
> E1000_RXDCTL_THRESH_UNIT_DESC);
> >+
> >+		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
> >+	}
> >+	/* momentarily enable the RX ring for the changes to take effect */
> >+	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
> >+	E1000_WRITE_FLUSH(hw);
> >+	usec_delay(150);
> >+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); }
> >+
> >+/**
> >+ * em_flush_desc_rings - remove all descriptors from the descriptor
> >+rings
> >+ *
> >+ * In i219, the descriptor rings must be emptied before
> >+resetting/closing the
> >+ * HW. Failure to do this will cause the HW to enter a unit hang state
> >+which
> >+ * can only be released by PCI reset on the device
> >+ *
> >+ */
> >+
> >+void em_flush_desc_rings(struct rte_eth_dev *dev) {
> >+	uint32_t fextnvm11, tdlen;
> >+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> >+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> >+	uint16_t hang_state = 0;
> >+
> >+	fextnvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
> >+	E1000_WRITE_REG(hw, E1000_FEXTNVM11,
> >+			fextnvm11 |
> E1000_FEXTNVM11_DISABLE_MULR_FIX);
> >+	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
> >+	rte_pci_read_config(pci_dev, &hang_state, sizeof(hang_state),
> >+				PCI_CFG_STATUS_REG);
> >+
> >+	/* do nothing if we're not in faulty state, or if the queue is empty */
> >+	if ((hang_state & FLUSH_DESC_REQUIRED) && tdlen) {
> >+		/* flush desc ring */
> >+		e1000_flush_tx_ring(dev);
> >+		rte_pci_read_config(pci_dev, &hang_state,
> sizeof(hang_state),
> >+					PCI_CFG_STATUS_REG);
> >+		if (hang_state & FLUSH_DESC_REQUIRED)
> >+			e1000_flush_rx_ring(dev);
> >+	}
> >+}
> >--
> >2.7.4
> >

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [v6] net/e1000: fix i219 hang on reset/close
  2019-07-22 11:19   ` [dpdk-dev] [v6] " Xiao Zhang
@ 2019-07-22  9:26     ` Ye Xiaolong
  2019-07-22  3:18       ` Zhang, Xiao
  2019-07-22 12:19     ` [dpdk-dev] [v7] " Xiao Zhang
  1 sibling, 1 reply; 17+ messages in thread
From: Ye Xiaolong @ 2019-07-22  9:26 UTC (permalink / raw)
  To: Xiao Zhang; +Cc: dev, wenzhuo.lu, wei.zhao1, stable

On 07/22, Xiao Zhang wrote:
>Unit hang may occur if multiple descriptors are available in the rings
>during reset or close. This state can be detected by configure status
>by bit 8 in register. If the bit is set and there are pending
>descriptors in one of the rings, we must flush them before reset or
>close.
>
>Cc: stable@dpdk.org

May need a Fixes tag here.

Thanks,
Xiaolong

>
>Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>
>---
>v6 Change the fix on em driver instead of igb driver and update the 
>register address according to C-Spec.
>v5 Change the subject.
>v4 Correct the tail descriptor of tx ring.
>v3 Add loop to handle all tx and rx queues.
>v2 Use configuration register instead of NVM7 to get the hang state.
>---
> drivers/net/e1000/e1000_ethdev.h |   4 ++
> drivers/net/e1000/em_ethdev.c    |   5 ++
> drivers/net/e1000/em_rxtx.c      | 108 +++++++++++++++++++++++++++++++++++++++
> 3 files changed, 117 insertions(+)
>
>diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
>index 67acb73..01ff943 100644
>--- a/drivers/net/e1000/e1000_ethdev.h
>+++ b/drivers/net/e1000/e1000_ethdev.h
>@@ -35,6 +35,9 @@
> #define IGB_MAX_RX_QUEUE_NUM           8
> #define IGB_MAX_RX_QUEUE_NUM_82576     16
> 
>+#define E1000_I219_MAX_RX_QUEUE_NUM		2
>+#define E1000_I219_MAX_TX_QUEUE_NUM		2
>+
> #define E1000_SYN_FILTER_ENABLE        0x00000001 /* syn filter enable field */
> #define E1000_SYN_FILTER_QUEUE         0x0000000E /* syn filter queue field */
> #define E1000_SYN_FILTER_QUEUE_SHIFT   1          /* syn filter queue field */
>@@ -522,5 +525,6 @@ int igb_action_rss_same(const struct rte_flow_action_rss *comp,
> int igb_config_rss_filter(struct rte_eth_dev *dev,
> 			struct igb_rte_flow_rss_conf *conf,
> 			bool add);
>+void em_flush_desc_rings(struct rte_eth_dev *dev);
> 
> #endif /* _E1000_ETHDEV_H_ */
>diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
>index dc88661..62d3a95 100644
>--- a/drivers/net/e1000/em_ethdev.c
>+++ b/drivers/net/e1000/em_ethdev.c
>@@ -738,6 +738,11 @@ eth_em_stop(struct rte_eth_dev *dev)
> 	em_lsc_intr_disable(hw);
> 
> 	e1000_reset_hw(hw);
>+
>+	/* Flush desc rings for i219 */
>+	if (hw->mac.type >= e1000_pch_spt)
>+		em_flush_desc_rings(dev);
>+
> 	if (hw->mac.type >= e1000_82544)
> 		E1000_WRITE_REG(hw, E1000_WUC, 0);
> 
>diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
>index 708f832..880fc7e 100644
>--- a/drivers/net/e1000/em_rxtx.c
>+++ b/drivers/net/e1000/em_rxtx.c
>@@ -18,6 +18,7 @@
> #include <rte_log.h>
> #include <rte_debug.h>
> #include <rte_pci.h>
>+#include <rte_bus_pci.h>
> #include <rte_memory.h>
> #include <rte_memcpy.h>
> #include <rte_memzone.h>
>@@ -59,6 +60,11 @@
> #define E1000_TX_OFFLOAD_NOTSUP_MASK \
> 		(PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
> 
>+/* PCI offset for querying configuration status register */
>+#define PCI_CFG_STATUS_REG                 0x06
>+#define FLUSH_DESC_REQUIRED               0x100
>+
>+
> /**
>  * Structure associated with each descriptor of the RX ring of a RX queue.
>  */
>@@ -2000,3 +2006,105 @@ em_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> 	qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
> 	qinfo->conf.offloads = txq->offloads;
> }
>+
>+static void e1000_flush_tx_ring(struct rte_eth_dev *dev)
>+{
>+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>+	volatile struct e1000_data_desc *tx_desc;
>+	volatile uint32_t *tdt_reg_addr;
>+	uint32_t tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
>+	uint16_t size = 512;
>+	struct em_tx_queue *txq;
>+	int i;
>+
>+	if (dev->data->tx_queues == NULL)
>+		return;
>+	tctl = E1000_READ_REG(hw, E1000_TCTL);
>+	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
>+	for (i = 0; i < dev->data->nb_tx_queues &&
>+		i < E1000_I219_MAX_TX_QUEUE_NUM; i++) {
>+		txq = dev->data->tx_queues[i];
>+		tdt = E1000_READ_REG(hw, E1000_TDT(i));
>+		if (tdt != txq->tx_tail)
>+			return;
>+		tx_desc = &txq->tx_ring[txq->tx_tail];
>+		tx_desc->buffer_addr = rte_cpu_to_le_64(txq->tx_ring_phys_addr);
>+		tx_desc->lower.data = rte_cpu_to_le_32(txd_lower | size);
>+		tx_desc->upper.data = 0;
>+
>+		rte_wmb();
>+		txq->tx_tail++;
>+		if (txq->tx_tail == txq->nb_tx_desc)
>+			txq->tx_tail = 0;
>+		rte_io_wmb();
>+		tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(i));
>+		E1000_PCI_REG_WRITE_RELAXED(tdt_reg_addr, txq->tx_tail);
>+		usec_delay(250);
>+	}
>+}
>+
>+static void e1000_flush_rx_ring(struct rte_eth_dev *dev)
>+{
>+	uint32_t rctl, rxdctl;
>+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>+	int i;
>+
>+	rctl = E1000_READ_REG(hw, E1000_RCTL);
>+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
>+	E1000_WRITE_FLUSH(hw);
>+	usec_delay(150);
>+
>+	for (i = 0; i < dev->data->nb_rx_queues &&
>+		i < E1000_I219_MAX_RX_QUEUE_NUM; i++) {
>+		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
>+		/* zero the lower 14 bits (prefetch and host thresholds) */
>+		rxdctl &= 0xffffc000;
>+
>+		/* update thresholds: prefetch threshold to 31,
>+		 * host threshold to 1 and make sure the granularity
>+		 * is "descriptors" and not "cache lines"
>+		 */
>+		rxdctl |= (0x1F | (1UL << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
>+
>+		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
>+	}
>+	/* momentarily enable the RX ring for the changes to take effect */
>+	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
>+	E1000_WRITE_FLUSH(hw);
>+	usec_delay(150);
>+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
>+}
>+
>+/**
>+ * em_flush_desc_rings - remove all descriptors from the descriptor rings
>+ *
>+ * In i219, the descriptor rings must be emptied before resetting/closing the
>+ * HW. Failure to do this will cause the HW to enter a unit hang state which
>+ * can only be released by PCI reset on the device
>+ *
>+ */
>+
>+void em_flush_desc_rings(struct rte_eth_dev *dev)
>+{
>+	uint32_t fextnvm11, tdlen;
>+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
>+	uint16_t hang_state = 0;
>+
>+	fextnvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
>+	E1000_WRITE_REG(hw, E1000_FEXTNVM11,
>+			fextnvm11 | E1000_FEXTNVM11_DISABLE_MULR_FIX);
>+	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
>+	rte_pci_read_config(pci_dev, &hang_state, sizeof(hang_state),
>+				PCI_CFG_STATUS_REG);
>+
>+	/* do nothing if we're not in faulty state, or if the queue is empty */
>+	if ((hang_state & FLUSH_DESC_REQUIRED) && tdlen) {
>+		/* flush desc ring */
>+		e1000_flush_tx_ring(dev);
>+		rte_pci_read_config(pci_dev, &hang_state, sizeof(hang_state),
>+					PCI_CFG_STATUS_REG);
>+		if (hang_state & FLUSH_DESC_REQUIRED)
>+			e1000_flush_rx_ring(dev);
>+	}
>+}
>-- 
>2.7.4
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [v6] net/e1000: fix i219 hang on reset/close
  2019-07-11  9:51 ` [dpdk-dev] [v5] net/e1000: fix i219 hang " Xiao Zhang
@ 2019-07-22 11:19   ` Xiao Zhang
  2019-07-22  9:26     ` Ye Xiaolong
  2019-07-22 12:19     ` [dpdk-dev] [v7] " Xiao Zhang
  0 siblings, 2 replies; 17+ messages in thread
From: Xiao Zhang @ 2019-07-22 11:19 UTC (permalink / raw)
  To: dev; +Cc: wenzhuo.lu, wei.zhao1, Xiao Zhang, stable

Unit hang may occur if multiple descriptors are available in the rings
during reset or close. This state can be detected by configure status
by bit 8 in register. If the bit is set and there are pending
descriptors in one of the rings, we must flush them before reset or
close.

Cc: stable@dpdk.org

Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>
---
v6 Change the fix on em driver instead of igb driver and update the 
register address according to C-Spec.
v5 Change the subject.
v4 Correct the tail descriptor of tx ring.
v3 Add loop to handle all tx and rx queues.
v2 Use configuration register instead of NVM7 to get the hang state.
---
 drivers/net/e1000/e1000_ethdev.h |   4 ++
 drivers/net/e1000/em_ethdev.c    |   5 ++
 drivers/net/e1000/em_rxtx.c      | 108 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 117 insertions(+)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 67acb73..01ff943 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -35,6 +35,9 @@
 #define IGB_MAX_RX_QUEUE_NUM           8
 #define IGB_MAX_RX_QUEUE_NUM_82576     16
 
+#define E1000_I219_MAX_RX_QUEUE_NUM		2
+#define E1000_I219_MAX_TX_QUEUE_NUM		2
+
 #define E1000_SYN_FILTER_ENABLE        0x00000001 /* syn filter enable field */
 #define E1000_SYN_FILTER_QUEUE         0x0000000E /* syn filter queue field */
 #define E1000_SYN_FILTER_QUEUE_SHIFT   1          /* syn filter queue field */
@@ -522,5 +525,6 @@ int igb_action_rss_same(const struct rte_flow_action_rss *comp,
 int igb_config_rss_filter(struct rte_eth_dev *dev,
 			struct igb_rte_flow_rss_conf *conf,
 			bool add);
+void em_flush_desc_rings(struct rte_eth_dev *dev);
 
 #endif /* _E1000_ETHDEV_H_ */
diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index dc88661..62d3a95 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -738,6 +738,11 @@ eth_em_stop(struct rte_eth_dev *dev)
 	em_lsc_intr_disable(hw);
 
 	e1000_reset_hw(hw);
+
+	/* Flush desc rings for i219 */
+	if (hw->mac.type >= e1000_pch_spt)
+		em_flush_desc_rings(dev);
+
 	if (hw->mac.type >= e1000_82544)
 		E1000_WRITE_REG(hw, E1000_WUC, 0);
 
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 708f832..880fc7e 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -18,6 +18,7 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_memory.h>
 #include <rte_memcpy.h>
 #include <rte_memzone.h>
@@ -59,6 +60,11 @@
 #define E1000_TX_OFFLOAD_NOTSUP_MASK \
 		(PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
 
+/* PCI offset for querying configuration status register */
+#define PCI_CFG_STATUS_REG                 0x06
+#define FLUSH_DESC_REQUIRED               0x100
+
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -2000,3 +2006,105 @@ em_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
 	qinfo->conf.offloads = txq->offloads;
 }
+
+static void e1000_flush_tx_ring(struct rte_eth_dev *dev)
+{
+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	volatile struct e1000_data_desc *tx_desc;
+	volatile uint32_t *tdt_reg_addr;
+	uint32_t tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
+	uint16_t size = 512;
+	struct em_tx_queue *txq;
+	int i;
+
+	if (dev->data->tx_queues == NULL)
+		return;
+	tctl = E1000_READ_REG(hw, E1000_TCTL);
+	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
+	for (i = 0; i < dev->data->nb_tx_queues &&
+		i < E1000_I219_MAX_TX_QUEUE_NUM; i++) {
+		txq = dev->data->tx_queues[i];
+		tdt = E1000_READ_REG(hw, E1000_TDT(i));
+		if (tdt != txq->tx_tail)
+			return;
+		tx_desc = &txq->tx_ring[txq->tx_tail];
+		tx_desc->buffer_addr = rte_cpu_to_le_64(txq->tx_ring_phys_addr);
+		tx_desc->lower.data = rte_cpu_to_le_32(txd_lower | size);
+		tx_desc->upper.data = 0;
+
+		rte_wmb();
+		txq->tx_tail++;
+		if (txq->tx_tail == txq->nb_tx_desc)
+			txq->tx_tail = 0;
+		rte_io_wmb();
+		tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(i));
+		E1000_PCI_REG_WRITE_RELAXED(tdt_reg_addr, txq->tx_tail);
+		usec_delay(250);
+	}
+}
+
+static void e1000_flush_rx_ring(struct rte_eth_dev *dev)
+{
+	uint32_t rctl, rxdctl;
+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	int i;
+
+	rctl = E1000_READ_REG(hw, E1000_RCTL);
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+	E1000_WRITE_FLUSH(hw);
+	usec_delay(150);
+
+	for (i = 0; i < dev->data->nb_rx_queues &&
+		i < E1000_I219_MAX_RX_QUEUE_NUM; i++) {
+		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
+		/* zero the lower 14 bits (prefetch and host thresholds) */
+		rxdctl &= 0xffffc000;
+
+		/* update thresholds: prefetch threshold to 31,
+		 * host threshold to 1 and make sure the granularity
+		 * is "descriptors" and not "cache lines"
+		 */
+		rxdctl |= (0x1F | (1UL << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
+
+		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
+	}
+	/* momentarily enable the RX ring for the changes to take effect */
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
+	E1000_WRITE_FLUSH(hw);
+	usec_delay(150);
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+}
+
+/**
+ * em_flush_desc_rings - remove all descriptors from the descriptor rings
+ *
+ * In i219, the descriptor rings must be emptied before resetting/closing the
+ * HW. Failure to do this will cause the HW to enter a unit hang state which
+ * can only be released by PCI reset on the device
+ *
+ */
+
+void em_flush_desc_rings(struct rte_eth_dev *dev)
+{
+	uint32_t fextnvm11, tdlen;
+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+	uint16_t hang_state = 0;
+
+	fextnvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
+	E1000_WRITE_REG(hw, E1000_FEXTNVM11,
+			fextnvm11 | E1000_FEXTNVM11_DISABLE_MULR_FIX);
+	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
+	rte_pci_read_config(pci_dev, &hang_state, sizeof(hang_state),
+				PCI_CFG_STATUS_REG);
+
+	/* do nothing if we're not in faulty state, or if the queue is empty */
+	if ((hang_state & FLUSH_DESC_REQUIRED) && tdlen) {
+		/* flush desc ring */
+		e1000_flush_tx_ring(dev);
+		rte_pci_read_config(pci_dev, &hang_state, sizeof(hang_state),
+					PCI_CFG_STATUS_REG);
+		if (hang_state & FLUSH_DESC_REQUIRED)
+			e1000_flush_rx_ring(dev);
+	}
+}
-- 
2.7.4


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [v7] net/e1000: fix i219 hang on reset/close
  2019-07-22 11:19   ` [dpdk-dev] [v6] " Xiao Zhang
  2019-07-22  9:26     ` Ye Xiaolong
@ 2019-07-22 12:19     ` Xiao Zhang
  2019-07-22 12:34       ` Ye Xiaolong
  2019-07-22 15:11       ` [dpdk-dev] [v8] " Xiao Zhang
  1 sibling, 2 replies; 17+ messages in thread
From: Xiao Zhang @ 2019-07-22 12:19 UTC (permalink / raw)
  To: dev; +Cc: wenzhuo.lu, wei.zhao1, Xiao Zhang, stable

Unit hang may occur if multiple descriptors are available in the rings
during reset or close. This state can be detected by configure status
by bit 8 in register. If the bit is set and there are pending
descriptors in one of the rings, we must flush them before reset or
close.

Fixes: 80580344("e1000: support EM devices (also known as e1000/e1000e)")
Cc: stable@dpdk.org

Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>
---
v7 Add fix line.
v6 Change the fix on em driver instead of igb driver and update the 
register address according to C-Spec.
v5 Change the subject.
v4 Correct the tail descriptor of tx ring.
v3 Add loop to handle all tx and rx queues.
v2 Use configuration register instead of NVM7 to get the hang state.
---
 drivers/net/e1000/e1000_ethdev.h |   4 ++
 drivers/net/e1000/em_ethdev.c    |   5 ++
 drivers/net/e1000/em_rxtx.c      | 108 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 117 insertions(+)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 67acb73..01ff943 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -35,6 +35,9 @@
 #define IGB_MAX_RX_QUEUE_NUM           8
 #define IGB_MAX_RX_QUEUE_NUM_82576     16
 
+#define E1000_I219_MAX_RX_QUEUE_NUM		2
+#define E1000_I219_MAX_TX_QUEUE_NUM		2
+
 #define E1000_SYN_FILTER_ENABLE        0x00000001 /* syn filter enable field */
 #define E1000_SYN_FILTER_QUEUE         0x0000000E /* syn filter queue field */
 #define E1000_SYN_FILTER_QUEUE_SHIFT   1          /* syn filter queue field */
@@ -522,5 +525,6 @@ int igb_action_rss_same(const struct rte_flow_action_rss *comp,
 int igb_config_rss_filter(struct rte_eth_dev *dev,
 			struct igb_rte_flow_rss_conf *conf,
 			bool add);
+void em_flush_desc_rings(struct rte_eth_dev *dev);
 
 #endif /* _E1000_ETHDEV_H_ */
diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index dc88661..62d3a95 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -738,6 +738,11 @@ eth_em_stop(struct rte_eth_dev *dev)
 	em_lsc_intr_disable(hw);
 
 	e1000_reset_hw(hw);
+
+	/* Flush desc rings for i219 */
+	if (hw->mac.type >= e1000_pch_spt)
+		em_flush_desc_rings(dev);
+
 	if (hw->mac.type >= e1000_82544)
 		E1000_WRITE_REG(hw, E1000_WUC, 0);
 
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 708f832..96c10cd 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -18,6 +18,7 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_memory.h>
 #include <rte_memcpy.h>
 #include <rte_memzone.h>
@@ -59,6 +60,11 @@
 #define E1000_TX_OFFLOAD_NOTSUP_MASK \
 		(PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
 
+/* PCI offset for querying configuration status register */
+#define PCI_CFG_STATUS_REG                 0x06
+#define FLUSH_DESC_REQUIRED               0x100
+
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -2000,3 +2006,105 @@ em_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
 	qinfo->conf.offloads = txq->offloads;
 }
+
+static void e1000_flush_tx_ring(struct rte_eth_dev *dev)
+{
+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	volatile struct e1000_data_desc *tx_desc;
+	volatile uint32_t *tdt_reg_addr;
+	uint32_t tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
+	uint16_t size = 512;
+	struct em_tx_queue *txq;
+	int i;
+
+	if (dev->data->tx_queues == NULL)
+		return;
+	tctl = E1000_READ_REG(hw, E1000_TCTL);
+	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
+	for (i = 0; i < dev->data->nb_tx_queues &&
+		i < E1000_I219_MAX_TX_QUEUE_NUM; i++) {
+		txq = dev->data->tx_queues[i];
+		tdt = E1000_READ_REG(hw, E1000_TDT(i));
+		if (tdt != txq->tx_tail)
+			return;
+		tx_desc = &txq->tx_ring[txq->tx_tail];
+		tx_desc->buffer_addr = rte_cpu_to_le_64(txq->tx_ring_phys_addr);
+		tx_desc->lower.data = rte_cpu_to_le_32(txd_lower | size);
+		tx_desc->upper.data = 0;
+
+		rte_wmb();
+		txq->tx_tail++;
+		if (txq->tx_tail == txq->nb_tx_desc)
+			txq->tx_tail = 0;
+		rte_io_wmb();
+		tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(i));
+		E1000_PCI_REG_WRITE_RELAXED(tdt_reg_addr, txq->tx_tail);
+		usec_delay(250);
+	}
+}
+
+static void e1000_flush_rx_ring(struct rte_eth_dev *dev)
+{
+	uint32_t rctl, rxdctl;
+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	int i;
+
+	rctl = E1000_READ_REG(hw, E1000_RCTL);
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+	E1000_WRITE_FLUSH(hw);
+	usec_delay(150);
+
+	for (i = 0; i < dev->data->nb_rx_queues &&
+		i < E1000_I219_MAX_RX_QUEUE_NUM; i++) {
+		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
+		/* zero the lower 14 bits (prefetch and host thresholds) */
+		rxdctl &= 0xffffc000;
+
+		/* update thresholds: prefetch threshold to 31,
+		 * host threshold to 1 and make sure the granularity
+		 * is "descriptors" and not "cache lines"
+		 */
+		rxdctl |= (0x1F | (1UL << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
+
+		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
+	}
+	/* momentarily enable the RX ring for the changes to take effect */
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
+	E1000_WRITE_FLUSH(hw);
+	usec_delay(150);
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+}
+
+/**
+ * em_flush_desc_rings - remove all descriptors from the descriptor rings
+ *
+ * In i219, the descriptor rings must be emptied before resetting/closing the
+ * HW. Failure to do this will cause the HW to enter a unit hang state which
+ * can only be released by PCI reset on the device
+ *
+ */
+
+void em_flush_desc_rings(struct rte_eth_dev *dev)
+{
+	uint32_t fextnvm11, tdlen;
+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+	uint16_t pci_cfg_status = 0;
+
+	fextnvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
+	E1000_WRITE_REG(hw, E1000_FEXTNVM11,
+			fextnvm11 | E1000_FEXTNVM11_DISABLE_MULR_FIX);
+	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
+	rte_pci_read_config(pci_dev, &pci_cfg_status, sizeof(pci_cfg_status),
+				PCI_CFG_STATUS_REG);
+
+	/* do nothing if we're not in faulty state, or if the queue is empty */
+	if ((pci_cfg_status & FLUSH_DESC_REQUIRED) && tdlen) {
+		/* flush desc ring */
+		e1000_flush_tx_ring(dev);
+		rte_pci_read_config(pci_dev, &pci_cfg_status,
+				sizeof(pci_cfg_status), PCI_CFG_STATUS_REG);
+		if (pci_cfg_status & FLUSH_DESC_REQUIRED)
+			e1000_flush_rx_ring(dev);
+	}
+}
-- 
2.7.4


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [v7] net/e1000: fix i219 hang on reset/close
  2019-07-22 12:19     ` [dpdk-dev] [v7] " Xiao Zhang
@ 2019-07-22 12:34       ` Ye Xiaolong
  2019-07-22 15:11       ` [dpdk-dev] [v8] " Xiao Zhang
  1 sibling, 0 replies; 17+ messages in thread
From: Ye Xiaolong @ 2019-07-22 12:34 UTC (permalink / raw)
  To: Xiao Zhang; +Cc: dev, wenzhuo.lu, wei.zhao1, stable

On 07/22, Xiao Zhang wrote:
>Unit hang may occur if multiple descriptors are available in the rings
>during reset or close. This state can be detected by configure status
>by bit 8 in register. If the bit is set and there are pending
>descriptors in one of the rings, we must flush them before reset or
>close.
>
>Fixes: 80580344("e1000: support EM devices (also known as e1000/e1000e)")
>Cc: stable@dpdk.org
>
>Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>
>---
>v7 Add fix line.
>v6 Change the fix on em driver instead of igb driver and update the 
>register address according to C-Spec.
>v5 Change the subject.
>v4 Correct the tail descriptor of tx ring.
>v3 Add loop to handle all tx and rx queues.
>v2 Use configuration register instead of NVM7 to get the hang state.
>---
> drivers/net/e1000/e1000_ethdev.h |   4 ++
> drivers/net/e1000/em_ethdev.c    |   5 ++
> drivers/net/e1000/em_rxtx.c      | 108 +++++++++++++++++++++++++++++++++++++++
> 3 files changed, 117 insertions(+)
>
>diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
>index 67acb73..01ff943 100644
>--- a/drivers/net/e1000/e1000_ethdev.h
>+++ b/drivers/net/e1000/e1000_ethdev.h
>@@ -35,6 +35,9 @@
> #define IGB_MAX_RX_QUEUE_NUM           8
> #define IGB_MAX_RX_QUEUE_NUM_82576     16
> 
>+#define E1000_I219_MAX_RX_QUEUE_NUM		2
>+#define E1000_I219_MAX_TX_QUEUE_NUM		2
>+
> #define E1000_SYN_FILTER_ENABLE        0x00000001 /* syn filter enable field */
> #define E1000_SYN_FILTER_QUEUE         0x0000000E /* syn filter queue field */
> #define E1000_SYN_FILTER_QUEUE_SHIFT   1          /* syn filter queue field */
>@@ -522,5 +525,6 @@ int igb_action_rss_same(const struct rte_flow_action_rss *comp,
> int igb_config_rss_filter(struct rte_eth_dev *dev,
> 			struct igb_rte_flow_rss_conf *conf,
> 			bool add);
>+void em_flush_desc_rings(struct rte_eth_dev *dev);
> 
> #endif /* _E1000_ETHDEV_H_ */
>diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
>index dc88661..62d3a95 100644
>--- a/drivers/net/e1000/em_ethdev.c
>+++ b/drivers/net/e1000/em_ethdev.c
>@@ -738,6 +738,11 @@ eth_em_stop(struct rte_eth_dev *dev)
> 	em_lsc_intr_disable(hw);
> 
> 	e1000_reset_hw(hw);
>+
>+	/* Flush desc rings for i219 */
>+	if (hw->mac.type >= e1000_pch_spt)
>+		em_flush_desc_rings(dev);
>+
> 	if (hw->mac.type >= e1000_82544)
> 		E1000_WRITE_REG(hw, E1000_WUC, 0);
> 
>diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
>index 708f832..96c10cd 100644
>--- a/drivers/net/e1000/em_rxtx.c
>+++ b/drivers/net/e1000/em_rxtx.c
>@@ -18,6 +18,7 @@
> #include <rte_log.h>
> #include <rte_debug.h>
> #include <rte_pci.h>
>+#include <rte_bus_pci.h>
> #include <rte_memory.h>
> #include <rte_memcpy.h>
> #include <rte_memzone.h>
>@@ -59,6 +60,11 @@
> #define E1000_TX_OFFLOAD_NOTSUP_MASK \
> 		(PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
> 
>+/* PCI offset for querying configuration status register */
>+#define PCI_CFG_STATUS_REG                 0x06
>+#define FLUSH_DESC_REQUIRED               0x100
>+
>+
> /**
>  * Structure associated with each descriptor of the RX ring of a RX queue.
>  */
>@@ -2000,3 +2006,105 @@ em_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> 	qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
> 	qinfo->conf.offloads = txq->offloads;
> }
>+
>+static void e1000_flush_tx_ring(struct rte_eth_dev *dev)

Minor nit, according to dpdk community's coding style, the function type should
be on a line by itself preceding the function, like

static void
e1000_flush_tx_ring(struct rte_eth_dev *dev)

>+{
>+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>+	volatile struct e1000_data_desc *tx_desc;
>+	volatile uint32_t *tdt_reg_addr;
>+	uint32_t tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
>+	uint16_t size = 512;
>+	struct em_tx_queue *txq;
>+	int i;
>+
>+	if (dev->data->tx_queues == NULL)
>+		return;
>+	tctl = E1000_READ_REG(hw, E1000_TCTL);
>+	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
>+	for (i = 0; i < dev->data->nb_tx_queues &&
>+		i < E1000_I219_MAX_TX_QUEUE_NUM; i++) {
>+		txq = dev->data->tx_queues[i];
>+		tdt = E1000_READ_REG(hw, E1000_TDT(i));
>+		if (tdt != txq->tx_tail)
>+			return;
>+		tx_desc = &txq->tx_ring[txq->tx_tail];
>+		tx_desc->buffer_addr = rte_cpu_to_le_64(txq->tx_ring_phys_addr);
>+		tx_desc->lower.data = rte_cpu_to_le_32(txd_lower | size);
>+		tx_desc->upper.data = 0;
>+
>+		rte_wmb();
>+		txq->tx_tail++;
>+		if (txq->tx_tail == txq->nb_tx_desc)
>+			txq->tx_tail = 0;
>+		rte_io_wmb();
>+		tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(i));
>+		E1000_PCI_REG_WRITE_RELAXED(tdt_reg_addr, txq->tx_tail);
>+		usec_delay(250);
>+	}
>+}
>+
>+static void e1000_flush_rx_ring(struct rte_eth_dev *dev)

Ditto.

>+{
>+	uint32_t rctl, rxdctl;
>+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>+	int i;
>+
>+	rctl = E1000_READ_REG(hw, E1000_RCTL);
>+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
>+	E1000_WRITE_FLUSH(hw);
>+	usec_delay(150);
>+
>+	for (i = 0; i < dev->data->nb_rx_queues &&
>+		i < E1000_I219_MAX_RX_QUEUE_NUM; i++) {
>+		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
>+		/* zero the lower 14 bits (prefetch and host thresholds) */
>+		rxdctl &= 0xffffc000;
>+
>+		/* update thresholds: prefetch threshold to 31,
>+		 * host threshold to 1 and make sure the granularity
>+		 * is "descriptors" and not "cache lines"
>+		 */
>+		rxdctl |= (0x1F | (1UL << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
>+
>+		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
>+	}
>+	/* momentarily enable the RX ring for the changes to take effect */
>+	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
>+	E1000_WRITE_FLUSH(hw);
>+	usec_delay(150);
>+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
>+}
>+
>+/**
>+ * em_flush_desc_rings - remove all descriptors from the descriptor rings
>+ *
>+ * In i219, the descriptor rings must be emptied before resetting/closing the
>+ * HW. Failure to do this will cause the HW to enter a unit hang state which
>+ * can only be released by PCI reset on the device
>+ *
>+ */
>+
>+void em_flush_desc_rings(struct rte_eth_dev *dev)

Ditto.


For the rest part, 
Reviewed-by: Xiaolong Ye <xiaolong.ye@intel.com>

>+{
>+	uint32_t fextnvm11, tdlen;
>+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
>+	uint16_t pci_cfg_status = 0;
>+
>+	fextnvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
>+	E1000_WRITE_REG(hw, E1000_FEXTNVM11,
>+			fextnvm11 | E1000_FEXTNVM11_DISABLE_MULR_FIX);
>+	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
>+	rte_pci_read_config(pci_dev, &pci_cfg_status, sizeof(pci_cfg_status),
>+				PCI_CFG_STATUS_REG);
>+
>+	/* do nothing if we're not in faulty state, or if the queue is empty */
>+	if ((pci_cfg_status & FLUSH_DESC_REQUIRED) && tdlen) {
>+		/* flush desc ring */
>+		e1000_flush_tx_ring(dev);
>+		rte_pci_read_config(pci_dev, &pci_cfg_status,
>+				sizeof(pci_cfg_status), PCI_CFG_STATUS_REG);
>+		if (pci_cfg_status & FLUSH_DESC_REQUIRED)
>+			e1000_flush_rx_ring(dev);
>+	}
>+}
>-- 
>2.7.4
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [v8] net/e1000: fix i219 hang on reset/close
  2019-07-22 12:19     ` [dpdk-dev] [v7] " Xiao Zhang
  2019-07-22 12:34       ` Ye Xiaolong
@ 2019-07-22 15:11       ` Xiao Zhang
  2019-07-24  0:33         ` Zhang, Qi Z
                           ` (2 more replies)
  1 sibling, 3 replies; 17+ messages in thread
From: Xiao Zhang @ 2019-07-22 15:11 UTC (permalink / raw)
  To: dev; +Cc: wenzhuo.lu, wei.zhao1, xiaolong.ye, Xiao Zhang, stable

Unit hang may occur if multiple descriptors are available in the rings
during reset or close. This state can be detected by configure status
by bit 8 in register. If the bit is set and there are pending
descriptors in one of the rings, we must flush them before reset or
close.

Fixes: 80580344("e1000: support EM devices (also known as e1000/e1000e)")
Cc: stable@dpdk.org

Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>
---
v8 Modify to follow code style of dpdk community.
v7 Add fix line.
v6 Change the fix on em driver instead of igb driver and update the 
register address according to C-Spec.
v5 Change the subject.
v4 Correct the tail descriptor of tx ring.
v3 Add loop to handle all tx and rx queues.
v2 Use configuration register instead of NVM7 to get the hang state.
---
 drivers/net/e1000/e1000_ethdev.h |   4 ++
 drivers/net/e1000/em_ethdev.c    |   5 ++
 drivers/net/e1000/em_rxtx.c      | 111 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 120 insertions(+)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 67acb73..01ff943 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -35,6 +35,9 @@
 #define IGB_MAX_RX_QUEUE_NUM           8
 #define IGB_MAX_RX_QUEUE_NUM_82576     16
 
+#define E1000_I219_MAX_RX_QUEUE_NUM		2
+#define E1000_I219_MAX_TX_QUEUE_NUM		2
+
 #define E1000_SYN_FILTER_ENABLE        0x00000001 /* syn filter enable field */
 #define E1000_SYN_FILTER_QUEUE         0x0000000E /* syn filter queue field */
 #define E1000_SYN_FILTER_QUEUE_SHIFT   1          /* syn filter queue field */
@@ -522,5 +525,6 @@ int igb_action_rss_same(const struct rte_flow_action_rss *comp,
 int igb_config_rss_filter(struct rte_eth_dev *dev,
 			struct igb_rte_flow_rss_conf *conf,
 			bool add);
+void em_flush_desc_rings(struct rte_eth_dev *dev);
 
 #endif /* _E1000_ETHDEV_H_ */
diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index dc88661..62d3a95 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -738,6 +738,11 @@ eth_em_stop(struct rte_eth_dev *dev)
 	em_lsc_intr_disable(hw);
 
 	e1000_reset_hw(hw);
+
+	/* Flush desc rings for i219 */
+	if (hw->mac.type >= e1000_pch_spt)
+		em_flush_desc_rings(dev);
+
 	if (hw->mac.type >= e1000_82544)
 		E1000_WRITE_REG(hw, E1000_WUC, 0);
 
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 708f832..55d8a67 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -18,6 +18,7 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_memory.h>
 #include <rte_memcpy.h>
 #include <rte_memzone.h>
@@ -59,6 +60,11 @@
 #define E1000_TX_OFFLOAD_NOTSUP_MASK \
 		(PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
 
+/* PCI offset for querying configuration status register */
+#define PCI_CFG_STATUS_REG                 0x06
+#define FLUSH_DESC_REQUIRED               0x100
+
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -2000,3 +2006,108 @@ em_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
 	qinfo->conf.offloads = txq->offloads;
 }
+
+static void
+e1000_flush_tx_ring(struct rte_eth_dev *dev)
+{
+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	volatile struct e1000_data_desc *tx_desc;
+	volatile uint32_t *tdt_reg_addr;
+	uint32_t tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
+	uint16_t size = 512;
+	struct em_tx_queue *txq;
+	int i;
+
+	if (dev->data->tx_queues == NULL)
+		return;
+	tctl = E1000_READ_REG(hw, E1000_TCTL);
+	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
+	for (i = 0; i < dev->data->nb_tx_queues &&
+		i < E1000_I219_MAX_TX_QUEUE_NUM; i++) {
+		txq = dev->data->tx_queues[i];
+		tdt = E1000_READ_REG(hw, E1000_TDT(i));
+		if (tdt != txq->tx_tail)
+			return;
+		tx_desc = &txq->tx_ring[txq->tx_tail];
+		tx_desc->buffer_addr = rte_cpu_to_le_64(txq->tx_ring_phys_addr);
+		tx_desc->lower.data = rte_cpu_to_le_32(txd_lower | size);
+		tx_desc->upper.data = 0;
+
+		rte_wmb();
+		txq->tx_tail++;
+		if (txq->tx_tail == txq->nb_tx_desc)
+			txq->tx_tail = 0;
+		rte_io_wmb();
+		tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(i));
+		E1000_PCI_REG_WRITE_RELAXED(tdt_reg_addr, txq->tx_tail);
+		usec_delay(250);
+	}
+}
+
+static void
+e1000_flush_rx_ring(struct rte_eth_dev *dev)
+{
+	uint32_t rctl, rxdctl;
+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	int i;
+
+	rctl = E1000_READ_REG(hw, E1000_RCTL);
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+	E1000_WRITE_FLUSH(hw);
+	usec_delay(150);
+
+	for (i = 0; i < dev->data->nb_rx_queues &&
+		i < E1000_I219_MAX_RX_QUEUE_NUM; i++) {
+		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
+		/* zero the lower 14 bits (prefetch and host thresholds) */
+		rxdctl &= 0xffffc000;
+
+		/* update thresholds: prefetch threshold to 31,
+		 * host threshold to 1 and make sure the granularity
+		 * is "descriptors" and not "cache lines"
+		 */
+		rxdctl |= (0x1F | (1UL << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
+
+		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
+	}
+	/* momentarily enable the RX ring for the changes to take effect */
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
+	E1000_WRITE_FLUSH(hw);
+	usec_delay(150);
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+}
+
+/**
+ * em_flush_desc_rings - remove all descriptors from the descriptor rings
+ *
+ * In i219, the descriptor rings must be emptied before resetting/closing the
+ * HW. Failure to do this will cause the HW to enter a unit hang state which
+ * can only be released by PCI reset on the device
+ *
+ */
+
+void
+em_flush_desc_rings(struct rte_eth_dev *dev)
+{
+	uint32_t fextnvm11, tdlen;
+	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+	uint16_t pci_cfg_status = 0;
+
+	fextnvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
+	E1000_WRITE_REG(hw, E1000_FEXTNVM11,
+			fextnvm11 | E1000_FEXTNVM11_DISABLE_MULR_FIX);
+	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
+	rte_pci_read_config(pci_dev, &pci_cfg_status, sizeof(pci_cfg_status),
+				PCI_CFG_STATUS_REG);
+
+	/* do nothing if we're not in faulty state, or if the queue is empty */
+	if ((pci_cfg_status & FLUSH_DESC_REQUIRED) && tdlen) {
+		/* flush desc ring */
+		e1000_flush_tx_ring(dev);
+		rte_pci_read_config(pci_dev, &pci_cfg_status,
+				sizeof(pci_cfg_status), PCI_CFG_STATUS_REG);
+		if (pci_cfg_status & FLUSH_DESC_REQUIRED)
+			e1000_flush_rx_ring(dev);
+	}
+}
-- 
2.7.4


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [v8] net/e1000: fix i219 hang on reset/close
  2019-07-22 15:11       ` [dpdk-dev] [v8] " Xiao Zhang
@ 2019-07-24  0:33         ` Zhang, Qi Z
  2019-09-04 12:22         ` Kevin Traynor
  2019-09-05  6:06         ` Gavin Hu (Arm Technology China)
  2 siblings, 0 replies; 17+ messages in thread
From: Zhang, Qi Z @ 2019-07-24  0:33 UTC (permalink / raw)
  To: Zhang, Xiao, dev
  Cc: Lu, Wenzhuo, Zhao1, Wei, Ye, Xiaolong, Zhang, Xiao, stable



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Xiao Zhang
> Sent: Monday, July 22, 2019 11:12 PM
> To: dev@dpdk.org
> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Zhao1, Wei
> <wei.zhao1@intel.com>; Ye, Xiaolong <xiaolong.ye@intel.com>; Zhang, Xiao
> <xiao.zhang@intel.com>; stable@dpdk.org
> Subject: [dpdk-dev] [v8] net/e1000: fix i219 hang on reset/close
> 
> Unit hang may occur if multiple descriptors are available in the rings during
> reset or close. This state can be detected by configure status by bit 8 in
> register. If the bit is set and there are pending descriptors in one of the rings,
> we must flush them before reset or close.
> 
> Fixes: 80580344("e1000: support EM devices (also known as e1000/e1000e)")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>

Reviewed-by: Xiaolong Ye <xiaolong.ye@intel.com>

Applied to dpdk-next-net-intel

Thanks
Qi

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [v8] net/e1000: fix i219 hang on reset/close
  2019-07-22 15:11       ` [dpdk-dev] [v8] " Xiao Zhang
  2019-07-24  0:33         ` Zhang, Qi Z
@ 2019-09-04 12:22         ` Kevin Traynor
  2019-09-05  1:33           ` Zhang, Xiao
  2019-09-05  6:06         ` Gavin Hu (Arm Technology China)
  2 siblings, 1 reply; 17+ messages in thread
From: Kevin Traynor @ 2019-09-04 12:22 UTC (permalink / raw)
  To: Xiao Zhang, dev; +Cc: wenzhuo.lu, wei.zhao1, xiaolong.ye, stable

On 22/07/2019 16:11, Xiao Zhang wrote:
> Unit hang may occur if multiple descriptors are available in the rings
> during reset or close. This state can be detected by configure status
> by bit 8 in register. If the bit is set and there are pending
> descriptors in one of the rings, we must flush them before reset or
> close.
> 
> Fixes: 80580344("e1000: support EM devices (also known as e1000/e1000e)")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>
> ---
> v8 Modify to follow code style of dpdk community.
> v7 Add fix line.
> v6 Change the fix on em driver instead of igb driver and update the 
> register address according to C-Spec.
> v5 Change the subject.
> v4 Correct the tail descriptor of tx ring.
> v3 Add loop to handle all tx and rx queues.
> v2 Use configuration register instead of NVM7 to get the hang state.
> ---

Hi Wenzhuo, as e1000 maintainer can you review and ack this patch for
stable ?

>  drivers/net/e1000/e1000_ethdev.h |   4 ++
>  drivers/net/e1000/em_ethdev.c    |   5 ++
>  drivers/net/e1000/em_rxtx.c      | 111 +++++++++++++++++++++++++++++++++++++++
>  3 files changed, 120 insertions(+)
> 
> diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
> index 67acb73..01ff943 100644
> --- a/drivers/net/e1000/e1000_ethdev.h
> +++ b/drivers/net/e1000/e1000_ethdev.h
> @@ -35,6 +35,9 @@
>  #define IGB_MAX_RX_QUEUE_NUM           8
>  #define IGB_MAX_RX_QUEUE_NUM_82576     16
>  
> +#define E1000_I219_MAX_RX_QUEUE_NUM		2
> +#define E1000_I219_MAX_TX_QUEUE_NUM		2
> +
>  #define E1000_SYN_FILTER_ENABLE        0x00000001 /* syn filter enable field */
>  #define E1000_SYN_FILTER_QUEUE         0x0000000E /* syn filter queue field */
>  #define E1000_SYN_FILTER_QUEUE_SHIFT   1          /* syn filter queue field */
> @@ -522,5 +525,6 @@ int igb_action_rss_same(const struct rte_flow_action_rss *comp,
>  int igb_config_rss_filter(struct rte_eth_dev *dev,
>  			struct igb_rte_flow_rss_conf *conf,
>  			bool add);
> +void em_flush_desc_rings(struct rte_eth_dev *dev);
>  
>  #endif /* _E1000_ETHDEV_H_ */
> diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
> index dc88661..62d3a95 100644
> --- a/drivers/net/e1000/em_ethdev.c
> +++ b/drivers/net/e1000/em_ethdev.c
> @@ -738,6 +738,11 @@ eth_em_stop(struct rte_eth_dev *dev)
>  	em_lsc_intr_disable(hw);
>  
>  	e1000_reset_hw(hw);
> +
> +	/* Flush desc rings for i219 */
> +	if (hw->mac.type >= e1000_pch_spt)

It means it is called for mac types below - is it right?

	e1000_pch_spt,
	e1000_pch_cnp,
	e1000_82575,
	e1000_82576,
	e1000_82580,
	e1000_i350,
	e1000_i354,
	e1000_i210,
	e1000_i211,
	e1000_vfadapt,
	e1000_vfadapt_i350,

> +		em_flush_desc_rings(dev);
> +
>  	if (hw->mac.type >= e1000_82544)
>  		E1000_WRITE_REG(hw, E1000_WUC, 0);
>  
> diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
> index 708f832..55d8a67 100644
> --- a/drivers/net/e1000/em_rxtx.c
> +++ b/drivers/net/e1000/em_rxtx.c
> @@ -18,6 +18,7 @@
>  #include <rte_log.h>
>  #include <rte_debug.h>
>  #include <rte_pci.h>
> +#include <rte_bus_pci.h>
>  #include <rte_memory.h>
>  #include <rte_memcpy.h>
>  #include <rte_memzone.h>
> @@ -59,6 +60,11 @@
>  #define E1000_TX_OFFLOAD_NOTSUP_MASK \
>  		(PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
>  
> +/* PCI offset for querying configuration status register */
> +#define PCI_CFG_STATUS_REG                 0x06
> +#define FLUSH_DESC_REQUIRED               0x100
> +
> +
>  /**
>   * Structure associated with each descriptor of the RX ring of a RX queue.
>   */
> @@ -2000,3 +2006,108 @@ em_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
>  	qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
>  	qinfo->conf.offloads = txq->offloads;
>  }
> +
> +static void
> +e1000_flush_tx_ring(struct rte_eth_dev *dev)
> +{
> +	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +	volatile struct e1000_data_desc *tx_desc;
> +	volatile uint32_t *tdt_reg_addr;
> +	uint32_t tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
> +	uint16_t size = 512;
> +	struct em_tx_queue *txq;
> +	int i;
> +
> +	if (dev->data->tx_queues == NULL)
> +		return;
> +	tctl = E1000_READ_REG(hw, E1000_TCTL);
> +	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
> +	for (i = 0; i < dev->data->nb_tx_queues &&
> +		i < E1000_I219_MAX_TX_QUEUE_NUM; i++) {
> +		txq = dev->data->tx_queues[i];
> +		tdt = E1000_READ_REG(hw, E1000_TDT(i));
> +		if (tdt != txq->tx_tail)
> +			return;
> +		tx_desc = &txq->tx_ring[txq->tx_tail];
> +		tx_desc->buffer_addr = rte_cpu_to_le_64(txq->tx_ring_phys_addr);
> +		tx_desc->lower.data = rte_cpu_to_le_32(txd_lower | size);
> +		tx_desc->upper.data = 0;
> +
> +		rte_wmb();
> +		txq->tx_tail++;
> +		if (txq->tx_tail == txq->nb_tx_desc)
> +			txq->tx_tail = 0;
> +		rte_io_wmb();
> +		tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(i));
> +		E1000_PCI_REG_WRITE_RELAXED(tdt_reg_addr, txq->tx_tail);
> +		usec_delay(250);
> +	}
> +}
> +
> +static void
> +e1000_flush_rx_ring(struct rte_eth_dev *dev)
> +{
> +	uint32_t rctl, rxdctl;
> +	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +	int i;
> +
> +	rctl = E1000_READ_REG(hw, E1000_RCTL);
> +	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
> +	E1000_WRITE_FLUSH(hw);
> +	usec_delay(150);
> +
> +	for (i = 0; i < dev->data->nb_rx_queues &&
> +		i < E1000_I219_MAX_RX_QUEUE_NUM; i++) {
> +		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
> +		/* zero the lower 14 bits (prefetch and host thresholds) */
> +		rxdctl &= 0xffffc000;
> +
> +		/* update thresholds: prefetch threshold to 31,
> +		 * host threshold to 1 and make sure the granularity
> +		 * is "descriptors" and not "cache lines"
> +		 */
> +		rxdctl |= (0x1F | (1UL << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
> +
> +		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
> +	}
> +	/* momentarily enable the RX ring for the changes to take effect */
> +	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
> +	E1000_WRITE_FLUSH(hw);
> +	usec_delay(150);
> +	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
> +}
> +
> +/**
> + * em_flush_desc_rings - remove all descriptors from the descriptor rings
> + *
> + * In i219, the descriptor rings must be emptied before resetting/closing the
> + * HW. Failure to do this will cause the HW to enter a unit hang state which
> + * can only be released by PCI reset on the device
> + *
> + */
> +
> +void
> +em_flush_desc_rings(struct rte_eth_dev *dev)
> +{
> +	uint32_t fextnvm11, tdlen;
> +	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> +	uint16_t pci_cfg_status = 0;
> +
> +	fextnvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
> +	E1000_WRITE_REG(hw, E1000_FEXTNVM11,
> +			fextnvm11 | E1000_FEXTNVM11_DISABLE_MULR_FIX);
> +	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
> +	rte_pci_read_config(pci_dev, &pci_cfg_status, sizeof(pci_cfg_status),
> +				PCI_CFG_STATUS_REG);
> +
> +	/* do nothing if we're not in faulty state, or if the queue is empty */
> +	if ((pci_cfg_status & FLUSH_DESC_REQUIRED) && tdlen) {
> +		/* flush desc ring */
> +		e1000_flush_tx_ring(dev);
> +		rte_pci_read_config(pci_dev, &pci_cfg_status,
> +				sizeof(pci_cfg_status), PCI_CFG_STATUS_REG);
> +		if (pci_cfg_status & FLUSH_DESC_REQUIRED)
> +			e1000_flush_rx_ring(dev);
> +	}
> +}
> 


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [v8] net/e1000: fix i219 hang on reset/close
  2019-09-04 12:22         ` Kevin Traynor
@ 2019-09-05  1:33           ` Zhang, Xiao
  0 siblings, 0 replies; 17+ messages in thread
From: Zhang, Xiao @ 2019-09-05  1:33 UTC (permalink / raw)
  To: Kevin Traynor, dev; +Cc: Lu, Wenzhuo, Zhao1, Wei, Ye, Xiaolong, stable



> -----Original Message-----
> From: Kevin Traynor [mailto:ktraynor@redhat.com]
> Sent: Wednesday, September 4, 2019 8:23 PM
> To: Zhang, Xiao <xiao.zhang@intel.com>; dev@dpdk.org
> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Zhao1, Wei <wei.zhao1@intel.com>;
> Ye, Xiaolong <xiaolong.ye@intel.com>; stable@dpdk.org
> Subject: Re: [dpdk-dev] [v8] net/e1000: fix i219 hang on reset/close
> 
> On 22/07/2019 16:11, Xiao Zhang wrote:
> > Unit hang may occur if multiple descriptors are available in the rings
> > during reset or close. This state can be detected by configure status
> > by bit 8 in register. If the bit is set and there are pending
> > descriptors in one of the rings, we must flush them before reset or
> > close.
> >
> > Fixes: 80580344("e1000: support EM devices (also known as
> > e1000/e1000e)")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>
> > ---
> > v8 Modify to follow code style of dpdk community.
> > v7 Add fix line.
> > v6 Change the fix on em driver instead of igb driver and update the
> > register address according to C-Spec.
> > v5 Change the subject.
> > v4 Correct the tail descriptor of tx ring.
> > v3 Add loop to handle all tx and rx queues.
> > v2 Use configuration register instead of NVM7 to get the hang state.
> > ---
> 
> Hi Wenzhuo, as e1000 maintainer can you review and ack this patch for stable ?
> 
> >  drivers/net/e1000/e1000_ethdev.h |   4 ++
> >  drivers/net/e1000/em_ethdev.c    |   5 ++
> >  drivers/net/e1000/em_rxtx.c      | 111
> +++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 120 insertions(+)
> >
> > diff --git a/drivers/net/e1000/e1000_ethdev.h
> > b/drivers/net/e1000/e1000_ethdev.h
> > index 67acb73..01ff943 100644
> > --- a/drivers/net/e1000/e1000_ethdev.h
> > +++ b/drivers/net/e1000/e1000_ethdev.h
> > @@ -35,6 +35,9 @@
> >  #define IGB_MAX_RX_QUEUE_NUM           8
> >  #define IGB_MAX_RX_QUEUE_NUM_82576     16
> >
> > +#define E1000_I219_MAX_RX_QUEUE_NUM		2
> > +#define E1000_I219_MAX_TX_QUEUE_NUM		2
> > +
> >  #define E1000_SYN_FILTER_ENABLE        0x00000001 /* syn filter enable field
> */
> >  #define E1000_SYN_FILTER_QUEUE         0x0000000E /* syn filter queue field
> */
> >  #define E1000_SYN_FILTER_QUEUE_SHIFT   1          /* syn filter queue field */
> > @@ -522,5 +525,6 @@ int igb_action_rss_same(const struct
> > rte_flow_action_rss *comp,  int igb_config_rss_filter(struct rte_eth_dev *dev,
> >  			struct igb_rte_flow_rss_conf *conf,
> >  			bool add);
> > +void em_flush_desc_rings(struct rte_eth_dev *dev);
> >
> >  #endif /* _E1000_ETHDEV_H_ */
> > diff --git a/drivers/net/e1000/em_ethdev.c
> > b/drivers/net/e1000/em_ethdev.c index dc88661..62d3a95 100644
> > --- a/drivers/net/e1000/em_ethdev.c
> > +++ b/drivers/net/e1000/em_ethdev.c
> > @@ -738,6 +738,11 @@ eth_em_stop(struct rte_eth_dev *dev)
> >  	em_lsc_intr_disable(hw);
> >
> >  	e1000_reset_hw(hw);
> > +
> > +	/* Flush desc rings for i219 */
> > +	if (hw->mac.type >= e1000_pch_spt)
> 
> It means it is called for mac types below - is it right?
> 
> 	e1000_pch_spt,
> 	e1000_pch_cnp,
> 	e1000_82575,
> 	e1000_82576,
> 	e1000_82580,
> 	e1000_i350,
> 	e1000_i354,
> 	e1000_i210,
> 	e1000_i211,
> 	e1000_vfadapt,
> 	e1000_vfadapt_i350,

This should for types e1000_pch_spt and e1000_pch_cnp, will correct it.

> 
> > +		em_flush_desc_rings(dev);
> > +
> >  	if (hw->mac.type >= e1000_82544)
> >  		E1000_WRITE_REG(hw, E1000_WUC, 0);
> >
> > diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
> > index 708f832..55d8a67 100644
> > --- a/drivers/net/e1000/em_rxtx.c
> > +++ b/drivers/net/e1000/em_rxtx.c
> > @@ -18,6 +18,7 @@
> >  #include <rte_log.h>
> >  #include <rte_debug.h>
> >  #include <rte_pci.h>
> > +#include <rte_bus_pci.h>
> >  #include <rte_memory.h>
> >  #include <rte_memcpy.h>
> >  #include <rte_memzone.h>
> > @@ -59,6 +60,11 @@
> >  #define E1000_TX_OFFLOAD_NOTSUP_MASK \
> >  		(PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
> >
> > +/* PCI offset for querying configuration status register */
> > +#define PCI_CFG_STATUS_REG                 0x06
> > +#define FLUSH_DESC_REQUIRED               0x100
> > +
> > +
> >  /**
> >   * Structure associated with each descriptor of the RX ring of a RX queue.
> >   */
> > @@ -2000,3 +2006,108 @@ em_txq_info_get(struct rte_eth_dev *dev,
> uint16_t queue_id,
> >  	qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
> >  	qinfo->conf.offloads = txq->offloads;  }
> > +
> > +static void
> > +e1000_flush_tx_ring(struct rte_eth_dev *dev) {
> > +	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> > +	volatile struct e1000_data_desc *tx_desc;
> > +	volatile uint32_t *tdt_reg_addr;
> > +	uint32_t tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
> > +	uint16_t size = 512;
> > +	struct em_tx_queue *txq;
> > +	int i;
> > +
> > +	if (dev->data->tx_queues == NULL)
> > +		return;
> > +	tctl = E1000_READ_REG(hw, E1000_TCTL);
> > +	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
> > +	for (i = 0; i < dev->data->nb_tx_queues &&
> > +		i < E1000_I219_MAX_TX_QUEUE_NUM; i++) {
> > +		txq = dev->data->tx_queues[i];
> > +		tdt = E1000_READ_REG(hw, E1000_TDT(i));
> > +		if (tdt != txq->tx_tail)
> > +			return;
> > +		tx_desc = &txq->tx_ring[txq->tx_tail];
> > +		tx_desc->buffer_addr = rte_cpu_to_le_64(txq-
> >tx_ring_phys_addr);
> > +		tx_desc->lower.data = rte_cpu_to_le_32(txd_lower | size);
> > +		tx_desc->upper.data = 0;
> > +
> > +		rte_wmb();
> > +		txq->tx_tail++;
> > +		if (txq->tx_tail == txq->nb_tx_desc)
> > +			txq->tx_tail = 0;
> > +		rte_io_wmb();
> > +		tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(i));
> > +		E1000_PCI_REG_WRITE_RELAXED(tdt_reg_addr, txq->tx_tail);
> > +		usec_delay(250);
> > +	}
> > +}
> > +
> > +static void
> > +e1000_flush_rx_ring(struct rte_eth_dev *dev) {
> > +	uint32_t rctl, rxdctl;
> > +	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> > +	int i;
> > +
> > +	rctl = E1000_READ_REG(hw, E1000_RCTL);
> > +	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
> > +	E1000_WRITE_FLUSH(hw);
> > +	usec_delay(150);
> > +
> > +	for (i = 0; i < dev->data->nb_rx_queues &&
> > +		i < E1000_I219_MAX_RX_QUEUE_NUM; i++) {
> > +		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
> > +		/* zero the lower 14 bits (prefetch and host thresholds) */
> > +		rxdctl &= 0xffffc000;
> > +
> > +		/* update thresholds: prefetch threshold to 31,
> > +		 * host threshold to 1 and make sure the granularity
> > +		 * is "descriptors" and not "cache lines"
> > +		 */
> > +		rxdctl |= (0x1F | (1UL << 8) |
> E1000_RXDCTL_THRESH_UNIT_DESC);
> > +
> > +		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
> > +	}
> > +	/* momentarily enable the RX ring for the changes to take effect */
> > +	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
> > +	E1000_WRITE_FLUSH(hw);
> > +	usec_delay(150);
> > +	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); }
> > +
> > +/**
> > + * em_flush_desc_rings - remove all descriptors from the descriptor
> > +rings
> > + *
> > + * In i219, the descriptor rings must be emptied before
> > +resetting/closing the
> > + * HW. Failure to do this will cause the HW to enter a unit hang
> > +state which
> > + * can only be released by PCI reset on the device
> > + *
> > + */
> > +
> > +void
> > +em_flush_desc_rings(struct rte_eth_dev *dev) {
> > +	uint32_t fextnvm11, tdlen;
> > +	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> > +	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> > +	uint16_t pci_cfg_status = 0;
> > +
> > +	fextnvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
> > +	E1000_WRITE_REG(hw, E1000_FEXTNVM11,
> > +			fextnvm11 | E1000_FEXTNVM11_DISABLE_MULR_FIX);
> > +	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
> > +	rte_pci_read_config(pci_dev, &pci_cfg_status, sizeof(pci_cfg_status),
> > +				PCI_CFG_STATUS_REG);
> > +
> > +	/* do nothing if we're not in faulty state, or if the queue is empty */
> > +	if ((pci_cfg_status & FLUSH_DESC_REQUIRED) && tdlen) {
> > +		/* flush desc ring */
> > +		e1000_flush_tx_ring(dev);
> > +		rte_pci_read_config(pci_dev, &pci_cfg_status,
> > +				sizeof(pci_cfg_status), PCI_CFG_STATUS_REG);
> > +		if (pci_cfg_status & FLUSH_DESC_REQUIRED)
> > +			e1000_flush_rx_ring(dev);
> > +	}
> > +}
> >


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [v8] net/e1000: fix i219 hang on reset/close
  2019-07-22 15:11       ` [dpdk-dev] [v8] " Xiao Zhang
  2019-07-24  0:33         ` Zhang, Qi Z
  2019-09-04 12:22         ` Kevin Traynor
@ 2019-09-05  6:06         ` Gavin Hu (Arm Technology China)
  2019-09-06  4:30           ` Zhang, Xiao
  2 siblings, 1 reply; 17+ messages in thread
From: Gavin Hu (Arm Technology China) @ 2019-09-05  6:06 UTC (permalink / raw)
  To: Xiao Zhang, dev; +Cc: wenzhuo.lu, wei.zhao1, xiaolong.ye, stable

Hi Xiao,

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Xiao Zhang
> Sent: Monday, July 22, 2019 11:12 PM
> To: dev@dpdk.org
> Cc: wenzhuo.lu@intel.com; wei.zhao1@intel.com; xiaolong.ye@intel.com;
> Xiao Zhang <xiao.zhang@intel.com>; stable@dpdk.org
> Subject: [dpdk-dev] [v8] net/e1000: fix i219 hang on reset/close
>
> Unit hang may occur if multiple descriptors are available in the rings
> during reset or close. This state can be detected by configure status
> by bit 8 in register. If the bit is set and there are pending
> descriptors in one of the rings, we must flush them before reset or
> close.
>
> Fixes: 80580344("e1000: support EM devices (also known as e1000/e1000e)")
> Cc: stable@dpdk.org
>
> Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>
> ---
> v8 Modify to follow code style of dpdk community.
> v7 Add fix line.
> v6 Change the fix on em driver instead of igb driver and update the
> register address according to C-Spec.
> v5 Change the subject.
> v4 Correct the tail descriptor of tx ring.
> v3 Add loop to handle all tx and rx queues.
> v2 Use configuration register instead of NVM7 to get the hang state.
> ---
>  drivers/net/e1000/e1000_ethdev.h |   4 ++
>  drivers/net/e1000/em_ethdev.c    |   5 ++
>  drivers/net/e1000/em_rxtx.c      | 111
> +++++++++++++++++++++++++++++++++++++++
>  3 files changed, 120 insertions(+)
>
> diff --git a/drivers/net/e1000/e1000_ethdev.h
> b/drivers/net/e1000/e1000_ethdev.h
> index 67acb73..01ff943 100644
> --- a/drivers/net/e1000/e1000_ethdev.h
> +++ b/drivers/net/e1000/e1000_ethdev.h
> @@ -35,6 +35,9 @@
>  #define IGB_MAX_RX_QUEUE_NUM           8
>  #define IGB_MAX_RX_QUEUE_NUM_82576     16
>
> +#define E1000_I219_MAX_RX_QUEUE_NUM          2
> +#define E1000_I219_MAX_TX_QUEUE_NUM          2
> +
>  #define E1000_SYN_FILTER_ENABLE        0x00000001 /* syn filter enable field
> */
>  #define E1000_SYN_FILTER_QUEUE         0x0000000E /* syn filter queue field
> */
>  #define E1000_SYN_FILTER_QUEUE_SHIFT   1          /* syn filter queue field */
> @@ -522,5 +525,6 @@ int igb_action_rss_same(const struct
> rte_flow_action_rss *comp,
>  int igb_config_rss_filter(struct rte_eth_dev *dev,
>                       struct igb_rte_flow_rss_conf *conf,
>                       bool add);
> +void em_flush_desc_rings(struct rte_eth_dev *dev);
>
>  #endif /* _E1000_ETHDEV_H_ */
> diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
> index dc88661..62d3a95 100644
> --- a/drivers/net/e1000/em_ethdev.c
> +++ b/drivers/net/e1000/em_ethdev.c
> @@ -738,6 +738,11 @@ eth_em_stop(struct rte_eth_dev *dev)
>       em_lsc_intr_disable(hw);
>
>       e1000_reset_hw(hw);
> +
> +     /* Flush desc rings for i219 */
> +     if (hw->mac.type >= e1000_pch_spt)
> +             em_flush_desc_rings(dev);
> +
>       if (hw->mac.type >= e1000_82544)
>               E1000_WRITE_REG(hw, E1000_WUC, 0);
>
> diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
> index 708f832..55d8a67 100644
> --- a/drivers/net/e1000/em_rxtx.c
> +++ b/drivers/net/e1000/em_rxtx.c
> @@ -18,6 +18,7 @@
>  #include <rte_log.h>
>  #include <rte_debug.h>
>  #include <rte_pci.h>
> +#include <rte_bus_pci.h>
>  #include <rte_memory.h>
>  #include <rte_memcpy.h>
>  #include <rte_memzone.h>
> @@ -59,6 +60,11 @@
>  #define E1000_TX_OFFLOAD_NOTSUP_MASK \
>               (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
>
> +/* PCI offset for querying configuration status register */
> +#define PCI_CFG_STATUS_REG                 0x06
> +#define FLUSH_DESC_REQUIRED               0x100
> +
> +
>  /**
>   * Structure associated with each descriptor of the RX ring of a RX queue.
>   */
> @@ -2000,3 +2006,108 @@ em_txq_info_get(struct rte_eth_dev *dev,
> uint16_t queue_id,
>       qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
>       qinfo->conf.offloads = txq->offloads;
>  }
> +
> +static void
> +e1000_flush_tx_ring(struct rte_eth_dev *dev)
> +{
> +     struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> +     volatile struct e1000_data_desc *tx_desc;
> +     volatile uint32_t *tdt_reg_addr;
> +     uint32_t tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
> +     uint16_t size = 512;
> +     struct em_tx_queue *txq;
> +     int i;
> +
> +     if (dev->data->tx_queues == NULL)
> +             return;
> +     tctl = E1000_READ_REG(hw, E1000_TCTL);
> +     E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
> +     for (i = 0; i < dev->data->nb_tx_queues &&
> +             i < E1000_I219_MAX_TX_QUEUE_NUM; i++) {
> +             txq = dev->data->tx_queues[i];
> +             tdt = E1000_READ_REG(hw, E1000_TDT(i));
> +             if (tdt != txq->tx_tail)
> +                     return;
> +             tx_desc = &txq->tx_ring[txq->tx_tail];
> +             tx_desc->buffer_addr = rte_cpu_to_le_64(txq-
> >tx_ring_phys_addr);
> +             tx_desc->lower.data = rte_cpu_to_le_32(txd_lower | size);
> +             tx_desc->upper.data = 0;
> +
> +             rte_wmb();
Tx_desc is CIO memory, should rte_cio_wmb be used here?

> +             txq->tx_tail++;
> +             if (txq->tx_tail == txq->nb_tx_desc)
> +                     txq->tx_tail = 0;
> +             rte_io_wmb();
This line can be merged into the following line and use E1000_PCI_REG_WRITE API instead, which has an rte_io_wmb included.
This not only looks better but also better for understanding.
> +             tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(i));
> +             E1000_PCI_REG_WRITE_RELAXED(tdt_reg_addr, txq->tx_tail);
> +             usec_delay(250);
> +     }
> +}
> +
> +static void
> +e1000_flush_rx_ring(struct rte_eth_dev *dev)
> +{
> +     uint32_t rctl, rxdctl;
> +     struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> +     int i;
> +
> +     rctl = E1000_READ_REG(hw, E1000_RCTL);
> +     E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
> +     E1000_WRITE_FLUSH(hw);
> +     usec_delay(150);
> +
> +     for (i = 0; i < dev->data->nb_rx_queues &&
> +             i < E1000_I219_MAX_RX_QUEUE_NUM; i++) {
> +             rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
> +             /* zero the lower 14 bits (prefetch and host thresholds) */
> +             rxdctl &= 0xffffc000;
> +
> +             /* update thresholds: prefetch threshold to 31,
> +              * host threshold to 1 and make sure the granularity
> +              * is "descriptors" and not "cache lines"
> +              */
> +             rxdctl |= (0x1F | (1UL << 8) |
> E1000_RXDCTL_THRESH_UNIT_DESC);
> +
> +             E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
> +     }
> +     /* momentarily enable the RX ring for the changes to take effect */
> +     E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
> +     E1000_WRITE_FLUSH(hw);
> +     usec_delay(150);
> +     E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
> +}
> +
> +/**
> + * em_flush_desc_rings - remove all descriptors from the descriptor rings
> + *
> + * In i219, the descriptor rings must be emptied before resetting/closing the
> + * HW. Failure to do this will cause the HW to enter a unit hang state which
> + * can only be released by PCI reset on the device
> + *
> + */
> +
> +void
> +em_flush_desc_rings(struct rte_eth_dev *dev)
> +{
> +     uint32_t fextnvm11, tdlen;
> +     struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> +     struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> +     uint16_t pci_cfg_status = 0;
> +
> +     fextnvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
> +     E1000_WRITE_REG(hw, E1000_FEXTNVM11,
> +                     fextnvm11 |
> E1000_FEXTNVM11_DISABLE_MULR_FIX);
> +     tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
> +     rte_pci_read_config(pci_dev, &pci_cfg_status, sizeof(pci_cfg_status),
> +                             PCI_CFG_STATUS_REG);
> +
> +     /* do nothing if we're not in faulty state, or if the queue is empty */
> +     if ((pci_cfg_status & FLUSH_DESC_REQUIRED) && tdlen) {
> +             /* flush desc ring */
> +             e1000_flush_tx_ring(dev);
> +             rte_pci_read_config(pci_dev, &pci_cfg_status,
> +                             sizeof(pci_cfg_status),
> PCI_CFG_STATUS_REG);
> +             if (pci_cfg_status & FLUSH_DESC_REQUIRED)
> +                     e1000_flush_rx_ring(dev);
> +     }
> +}
> --
> 2.7.4

IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [v8] net/e1000: fix i219 hang on reset/close
  2019-09-05  6:06         ` Gavin Hu (Arm Technology China)
@ 2019-09-06  4:30           ` Zhang, Xiao
  0 siblings, 0 replies; 17+ messages in thread
From: Zhang, Xiao @ 2019-09-06  4:30 UTC (permalink / raw)
  To: Gavin Hu (Arm Technology China), dev
  Cc: Lu, Wenzhuo, Zhao1, Wei, Ye, Xiaolong, stable

Hi Gavin,

> -----Original Message-----
> From: Gavin Hu (Arm Technology China) [mailto:Gavin.Hu@arm.com]
> Sent: Thursday, September 5, 2019 2:07 PM
> To: Zhang, Xiao <xiao.zhang@intel.com>; dev@dpdk.org
> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Zhao1, Wei <wei.zhao1@intel.com>;
> Ye, Xiaolong <xiaolong.ye@intel.com>; stable@dpdk.org
> Subject: RE: [dpdk-dev] [v8] net/e1000: fix i219 hang on reset/close
> 
> Hi Xiao,
> 
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Xiao Zhang
> > Sent: Monday, July 22, 2019 11:12 PM
> > To: dev@dpdk.org
> > Cc: wenzhuo.lu@intel.com; wei.zhao1@intel.com; xiaolong.ye@intel.com;
> > Xiao Zhang <xiao.zhang@intel.com>; stable@dpdk.org
> > Subject: [dpdk-dev] [v8] net/e1000: fix i219 hang on reset/close
> >
> > Unit hang may occur if multiple descriptors are available in the rings
> > during reset or close. This state can be detected by configure status
> > by bit 8 in register. If the bit is set and there are pending
> > descriptors in one of the rings, we must flush them before reset or
> > close.
> >
> > Fixes: 80580344("e1000: support EM devices (also known as
> > e1000/e1000e)")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Xiao Zhang <xiao.zhang@intel.com>
> > ---
> > v8 Modify to follow code style of dpdk community.
> > v7 Add fix line.
> > v6 Change the fix on em driver instead of igb driver and update the
> > register address according to C-Spec.
> > v5 Change the subject.
> > v4 Correct the tail descriptor of tx ring.
> > v3 Add loop to handle all tx and rx queues.
> > v2 Use configuration register instead of NVM7 to get the hang state.
> > ---
> >  drivers/net/e1000/e1000_ethdev.h |   4 ++
> >  drivers/net/e1000/em_ethdev.c    |   5 ++
> >  drivers/net/e1000/em_rxtx.c      | 111
> > +++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 120 insertions(+)
> >
> > diff --git a/drivers/net/e1000/e1000_ethdev.h
> > b/drivers/net/e1000/e1000_ethdev.h
> > index 67acb73..01ff943 100644
> > --- a/drivers/net/e1000/e1000_ethdev.h
> > +++ b/drivers/net/e1000/e1000_ethdev.h
> > @@ -35,6 +35,9 @@
> >  #define IGB_MAX_RX_QUEUE_NUM           8
> >  #define IGB_MAX_RX_QUEUE_NUM_82576     16
> >
> > +#define E1000_I219_MAX_RX_QUEUE_NUM          2
> > +#define E1000_I219_MAX_TX_QUEUE_NUM          2
> > +
> >  #define E1000_SYN_FILTER_ENABLE        0x00000001 /* syn filter enable field
> > */
> >  #define E1000_SYN_FILTER_QUEUE         0x0000000E /* syn filter queue field
> > */
> >  #define E1000_SYN_FILTER_QUEUE_SHIFT   1          /* syn filter queue field */
> > @@ -522,5 +525,6 @@ int igb_action_rss_same(const struct
> > rte_flow_action_rss *comp,  int igb_config_rss_filter(struct
> > rte_eth_dev *dev,
> >                       struct igb_rte_flow_rss_conf *conf,
> >                       bool add);
> > +void em_flush_desc_rings(struct rte_eth_dev *dev);
> >
> >  #endif /* _E1000_ETHDEV_H_ */
> > diff --git a/drivers/net/e1000/em_ethdev.c
> > b/drivers/net/e1000/em_ethdev.c index dc88661..62d3a95 100644
> > --- a/drivers/net/e1000/em_ethdev.c
> > +++ b/drivers/net/e1000/em_ethdev.c
> > @@ -738,6 +738,11 @@ eth_em_stop(struct rte_eth_dev *dev)
> >       em_lsc_intr_disable(hw);
> >
> >       e1000_reset_hw(hw);
> > +
> > +     /* Flush desc rings for i219 */
> > +     if (hw->mac.type >= e1000_pch_spt)
> > +             em_flush_desc_rings(dev);
> > +
> >       if (hw->mac.type >= e1000_82544)
> >               E1000_WRITE_REG(hw, E1000_WUC, 0);
> >
> > diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
> > index 708f832..55d8a67 100644
> > --- a/drivers/net/e1000/em_rxtx.c
> > +++ b/drivers/net/e1000/em_rxtx.c
> > @@ -18,6 +18,7 @@
> >  #include <rte_log.h>
> >  #include <rte_debug.h>
> >  #include <rte_pci.h>
> > +#include <rte_bus_pci.h>
> >  #include <rte_memory.h>
> >  #include <rte_memcpy.h>
> >  #include <rte_memzone.h>
> > @@ -59,6 +60,11 @@
> >  #define E1000_TX_OFFLOAD_NOTSUP_MASK \
> >               (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
> >
> > +/* PCI offset for querying configuration status register */
> > +#define PCI_CFG_STATUS_REG                 0x06
> > +#define FLUSH_DESC_REQUIRED               0x100
> > +
> > +
> >  /**
> >   * Structure associated with each descriptor of the RX ring of a RX queue.
> >   */
> > @@ -2000,3 +2006,108 @@ em_txq_info_get(struct rte_eth_dev *dev,
> > uint16_t queue_id,
> >       qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
> >       qinfo->conf.offloads = txq->offloads;  }
> > +
> > +static void
> > +e1000_flush_tx_ring(struct rte_eth_dev *dev) {
> > +     struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data-
> > >dev_private);
> > +     volatile struct e1000_data_desc *tx_desc;
> > +     volatile uint32_t *tdt_reg_addr;
> > +     uint32_t tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
> > +     uint16_t size = 512;
> > +     struct em_tx_queue *txq;
> > +     int i;
> > +
> > +     if (dev->data->tx_queues == NULL)
> > +             return;
> > +     tctl = E1000_READ_REG(hw, E1000_TCTL);
> > +     E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
> > +     for (i = 0; i < dev->data->nb_tx_queues &&
> > +             i < E1000_I219_MAX_TX_QUEUE_NUM; i++) {
> > +             txq = dev->data->tx_queues[i];
> > +             tdt = E1000_READ_REG(hw, E1000_TDT(i));
> > +             if (tdt != txq->tx_tail)
> > +                     return;
> > +             tx_desc = &txq->tx_ring[txq->tx_tail];
> > +             tx_desc->buffer_addr = rte_cpu_to_le_64(txq-
> > >tx_ring_phys_addr);
> > +             tx_desc->lower.data = rte_cpu_to_le_32(txd_lower | size);
> > +             tx_desc->upper.data = 0;
> > +
> > +             rte_wmb();
> Tx_desc is CIO memory, should rte_cio_wmb be used here?
> 

Yes, will replay with rte_cio_wmb.

> > +             txq->tx_tail++;
> > +             if (txq->tx_tail == txq->nb_tx_desc)
> > +                     txq->tx_tail = 0;
> > +             rte_io_wmb();
> This line can be merged into the following line and use E1000_PCI_REG_WRITE
> API instead, which has an rte_io_wmb included.
> This not only looks better but also better for understanding.

Yes it's better to remove rte_io_wmb() and replace E1000_PCI_WRITE_RELAXED by
E1000_PCI_REG_WRITE.

> > +             tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(i));
> > +             E1000_PCI_REG_WRITE_RELAXED(tdt_reg_addr, txq->tx_tail);
> > +             usec_delay(250);
> > +     }
> > +}
> > +
> > +static void
> > +e1000_flush_rx_ring(struct rte_eth_dev *dev) {
> > +     uint32_t rctl, rxdctl;
> > +     struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data-
> > >dev_private);
> > +     int i;
> > +
> > +     rctl = E1000_READ_REG(hw, E1000_RCTL);
> > +     E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
> > +     E1000_WRITE_FLUSH(hw);
> > +     usec_delay(150);
> > +
> > +     for (i = 0; i < dev->data->nb_rx_queues &&
> > +             i < E1000_I219_MAX_RX_QUEUE_NUM; i++) {
> > +             rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
> > +             /* zero the lower 14 bits (prefetch and host thresholds) */
> > +             rxdctl &= 0xffffc000;
> > +
> > +             /* update thresholds: prefetch threshold to 31,
> > +              * host threshold to 1 and make sure the granularity
> > +              * is "descriptors" and not "cache lines"
> > +              */
> > +             rxdctl |= (0x1F | (1UL << 8) |
> > E1000_RXDCTL_THRESH_UNIT_DESC);
> > +
> > +             E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
> > +     }
> > +     /* momentarily enable the RX ring for the changes to take effect */
> > +     E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
> > +     E1000_WRITE_FLUSH(hw);
> > +     usec_delay(150);
> > +     E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); }
> > +
> > +/**
> > + * em_flush_desc_rings - remove all descriptors from the descriptor
> > +rings
> > + *
> > + * In i219, the descriptor rings must be emptied before
> > +resetting/closing the
> > + * HW. Failure to do this will cause the HW to enter a unit hang
> > +state which
> > + * can only be released by PCI reset on the device
> > + *
> > + */
> > +
> > +void
> > +em_flush_desc_rings(struct rte_eth_dev *dev) {
> > +     uint32_t fextnvm11, tdlen;
> > +     struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data-
> > >dev_private);
> > +     struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> > +     uint16_t pci_cfg_status = 0;
> > +
> > +     fextnvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
> > +     E1000_WRITE_REG(hw, E1000_FEXTNVM11,
> > +                     fextnvm11 |
> > E1000_FEXTNVM11_DISABLE_MULR_FIX);
> > +     tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
> > +     rte_pci_read_config(pci_dev, &pci_cfg_status, sizeof(pci_cfg_status),
> > +                             PCI_CFG_STATUS_REG);
> > +
> > +     /* do nothing if we're not in faulty state, or if the queue is empty */
> > +     if ((pci_cfg_status & FLUSH_DESC_REQUIRED) && tdlen) {
> > +             /* flush desc ring */
> > +             e1000_flush_tx_ring(dev);
> > +             rte_pci_read_config(pci_dev, &pci_cfg_status,
> > +                             sizeof(pci_cfg_status),
> > PCI_CFG_STATUS_REG);
> > +             if (pci_cfg_status & FLUSH_DESC_REQUIRED)
> > +                     e1000_flush_rx_ring(dev);
> > +     }
> > +}
> > --
> > 2.7.4
> 
> IMPORTANT NOTICE: The contents of this email and any attachments are
> confidential and may also be privileged. If you are not the intended recipient,
> please notify the sender immediately and do not disclose the contents to any
> other person, use it for any purpose, or store or copy the information in any
> medium. Thank you.

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2019-09-06  4:30 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-10  0:42 [dpdk-dev] [v4] net/e1000: i219 unit hang issue fix on reset/close Xiao Zhang
2019-07-10  4:48 ` Anand H. Krishnan
2019-07-16  1:03   ` Anand H. Krishnan
2019-07-16  2:58     ` Zhang, Xiao
2019-07-10  7:37 ` Thomas Monjalon
2019-07-11  9:51 ` [dpdk-dev] [v5] net/e1000: fix i219 hang " Xiao Zhang
2019-07-22 11:19   ` [dpdk-dev] [v6] " Xiao Zhang
2019-07-22  9:26     ` Ye Xiaolong
2019-07-22  3:18       ` Zhang, Xiao
2019-07-22 12:19     ` [dpdk-dev] [v7] " Xiao Zhang
2019-07-22 12:34       ` Ye Xiaolong
2019-07-22 15:11       ` [dpdk-dev] [v8] " Xiao Zhang
2019-07-24  0:33         ` Zhang, Qi Z
2019-09-04 12:22         ` Kevin Traynor
2019-09-05  1:33           ` Zhang, Xiao
2019-09-05  6:06         ` Gavin Hu (Arm Technology China)
2019-09-06  4:30           ` Zhang, Xiao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).