DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH v1 0/1] fix NBL issues in DPDK 25.11-rc4
@ 2025-11-26  2:54 Dimon Zhao
  2025-11-26  2:54 ` [PATCH v1 1/1] net/nbl: fix Rx/Tx stats concurrency Dimon Zhao
  0 siblings, 1 reply; 3+ messages in thread
From: Dimon Zhao @ 2025-11-26  2:54 UTC (permalink / raw)
  To: dimon.zhao, dev

fix Rx/Tx stats concurrency

Dimon Zhao (1):
  net/nbl: fix Rx/Tx stats concurrency

 drivers/net/nbl/nbl_hw/nbl_resource.h |  2 ++
 drivers/net/nbl/nbl_hw/nbl_txrx.c     | 38 +++++++++++++--------------
 2 files changed, 21 insertions(+), 19 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH v1 1/1] net/nbl: fix Rx/Tx stats concurrency
  2025-11-26  2:54 [PATCH v1 0/1] fix NBL issues in DPDK 25.11-rc4 Dimon Zhao
@ 2025-11-26  2:54 ` Dimon Zhao
  2025-11-27  0:07   ` Stephen Hemminger
  0 siblings, 1 reply; 3+ messages in thread
From: Dimon Zhao @ 2025-11-26  2:54 UTC (permalink / raw)
  To: dimon.zhao, dev; +Cc: Kyo Liu, Leon Yu, Sam Chen

Queue statistics are being continuously updated in Rx/Tx burst
routines while handling traffic. In addition to that, statistics
can be reset (written with zeroes) on statistics reset in other
threads, causing a race condition, which in turn could result in
wrong stats.

The patch provides an approach with reference values, allowing
the actual counters to be writable within Rx/Tx burst threads
only, and updating reference values on stats reset.

Fixes: 661c0ccf2512 ("net/nbl: support statistics")

Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
 drivers/net/nbl/nbl_hw/nbl_resource.h |  2 ++
 drivers/net/nbl/nbl_hw/nbl_txrx.c     | 38 +++++++++++++--------------
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/drivers/net/nbl/nbl_hw/nbl_resource.h b/drivers/net/nbl/nbl_hw/nbl_resource.h
index aba0f8e0c1..1f6515f64d 100644
--- a/drivers/net/nbl/nbl_hw/nbl_resource.h
+++ b/drivers/net/nbl/nbl_hw/nbl_resource.h
@@ -141,6 +141,7 @@ struct nbl_res_tx_ring {
 	const struct rte_eth_dev *eth_dev;
 	struct nbl_common_info *common;
 	struct nbl_txq_stats txq_stats;
+	struct nbl_txq_stats txq_stats_reset;
 	u64 default_hdr[2];
 
 	enum nbl_product_type product;
@@ -182,6 +183,7 @@ struct nbl_res_rx_ring {
 	const struct rte_eth_dev *eth_dev;
 	struct nbl_common_info *common;
 	struct nbl_rxq_stats rxq_stats;
+	struct nbl_rxq_stats rxq_stats_reset;
 	uint64_t mbuf_initializer; /**< value to init mbufs */
 	struct rte_mbuf fake_mbuf;
 
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index d7ce725872..650790b4fc 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -727,9 +727,9 @@ static int nbl_res_txrx_get_stats(void *priv, struct rte_eth_stats *rte_stats,
 	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
 	struct rte_eth_dev *eth_dev = res_mgt->eth_dev;
 	struct nbl_res_rx_ring *rxq;
-	struct nbl_rxq_stats *rxq_stats;
+	struct nbl_rxq_stats *rxq_stats, *rxq_stats_reset;
 	struct nbl_res_tx_ring  *txq;
-	struct nbl_txq_stats *txq_stats;
+	struct nbl_txq_stats *txq_stats, *txq_stats_reset;
 	uint32_t i;
 	uint16_t idx;
 
@@ -739,16 +739,18 @@ static int nbl_res_txrx_get_stats(void *priv, struct rte_eth_stats *rte_stats,
 		if (unlikely(rxq  == NULL))
 			return -EINVAL;
 		rxq_stats = &rxq->rxq_stats;
+		rxq_stats_reset = &rxq->rxq_stats_reset;
 		idx = rxq->queue_id;
 
 		if (qstats && idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
-			qstats->q_ipackets[idx] += rxq_stats->rx_packets;
-			qstats->q_ibytes[idx] += rxq_stats->rx_bytes;
+			qstats->q_ipackets[idx] += rxq_stats->rx_packets -
+							rxq_stats_reset->rx_packets;
+			qstats->q_ibytes[idx] += rxq_stats->rx_bytes - rxq_stats_reset->rx_bytes;
 		}
-		rte_stats->ipackets += rxq_stats->rx_packets;
-		rte_stats->ibytes += rxq_stats->rx_bytes;
-		rte_stats->rx_nombuf += rxq_stats->rx_nombuf;
-		rte_stats->ierrors += rxq_stats->rx_ierror;
+		rte_stats->ipackets += rxq_stats->rx_packets - rxq_stats_reset->rx_packets;
+		rte_stats->ibytes += rxq_stats->rx_bytes - rxq_stats_reset->rx_bytes;
+		rte_stats->rx_nombuf += rxq_stats->rx_nombuf - rxq_stats_reset->rx_nombuf;
+		rte_stats->ierrors += rxq_stats->rx_ierror - rxq_stats_reset->rx_ierror;
 	}
 
 	for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
@@ -756,15 +758,17 @@ static int nbl_res_txrx_get_stats(void *priv, struct rte_eth_stats *rte_stats,
 		if (unlikely(txq  == NULL))
 			return -EINVAL;
 		txq_stats = &txq->txq_stats;
+		txq_stats_reset = &txq->txq_stats_reset;
 		idx = txq->queue_id;
 
 		if (qstats && idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
-			qstats->q_opackets[idx] += txq_stats->tx_packets;
-			qstats->q_obytes[idx] += txq_stats->tx_bytes;
+			qstats->q_opackets[idx] += txq_stats->tx_packets -
+							txq_stats_reset->tx_packets;
+			qstats->q_obytes[idx] += txq_stats->tx_bytes - txq_stats_reset->tx_bytes;
 		}
-		rte_stats->opackets += txq_stats->tx_packets;
-		rte_stats->obytes += txq_stats->tx_bytes;
-		rte_stats->oerrors += txq_stats->tx_errors;
+		rte_stats->opackets += txq_stats->tx_packets - txq_stats_reset->tx_packets;
+		rte_stats->obytes += txq_stats->tx_bytes - txq_stats_reset->tx_bytes;
+		rte_stats->oerrors += txq_stats->tx_errors - txq_stats_reset->tx_errors;
 	}
 
 	return 0;
@@ -776,9 +780,7 @@ nbl_res_txrx_reset_stats(void *priv)
 	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
 	struct rte_eth_dev *eth_dev = res_mgt->eth_dev;
 	struct nbl_res_rx_ring *rxq;
-	struct nbl_rxq_stats *rxq_stats;
 	struct nbl_res_tx_ring  *txq;
-	struct nbl_txq_stats *txq_stats;
 	uint32_t i;
 
 	/* Add software counters. */
@@ -787,8 +789,7 @@ nbl_res_txrx_reset_stats(void *priv)
 		if (unlikely(rxq == NULL))
 			continue;
 
-		rxq_stats = &rxq->rxq_stats;
-		memset(rxq_stats, 0, sizeof(struct nbl_rxq_stats));
+		rxq->rxq_stats_reset = rxq->rxq_stats;
 	}
 
 	for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
@@ -796,8 +797,7 @@ nbl_res_txrx_reset_stats(void *priv)
 		if (unlikely(txq == NULL))
 			continue;
 
-		txq_stats = &txq->txq_stats;
-		memset(txq_stats, 0, sizeof(struct nbl_txq_stats));
+		txq->txq_stats_reset = txq->txq_stats;
 	}
 
 	return 0;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v1 1/1] net/nbl: fix Rx/Tx stats concurrency
  2025-11-26  2:54 ` [PATCH v1 1/1] net/nbl: fix Rx/Tx stats concurrency Dimon Zhao
@ 2025-11-27  0:07   ` Stephen Hemminger
  0 siblings, 0 replies; 3+ messages in thread
From: Stephen Hemminger @ 2025-11-27  0:07 UTC (permalink / raw)
  To: Dimon Zhao; +Cc: dev, Kyo Liu, Leon Yu, Sam Chen

On Tue, 25 Nov 2025 18:54:36 -0800
Dimon Zhao <dimon.zhao@nebula-matrix.com> wrote:

> Queue statistics are being continuously updated in Rx/Tx burst
> routines while handling traffic. In addition to that, statistics
> can be reset (written with zeroes) on statistics reset in other
> threads, causing a race condition, which in turn could result in
> wrong stats.
> 
> The patch provides an approach with reference values, allowing
> the actual counters to be writable within Rx/Tx burst threads
> only, and updating reference values on stats reset.
> 
> Fixes: 661c0ccf2512 ("net/nbl: support statistics")
> 
> Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>

First off, many drivers do the same thing as the current code.
I think virtio is the most commonly used driver with same memset.

They just zero an accumulated buffer.
The SW counters are not meant to super exact (not a good idea to bill
customers based on packet counts).

Your new method using a stashed old copy, still has races.
The old code would race the zero with read/modify/write of the increment.
If the race happened the zero might happen in the middle of the modify
causing the value not to be zeroed.

The new code is less of a problem but assignment is not atomic on
many platforms, especially for structure size objects. Therefore it
could happen to get read of stale data.

If you really want to have reliable statistics in SW, you
would have to use atomic operations, and pay the penalty of the
additional locked operations in the fast path.

PS: FreeBSD has the same problem in many drivers.


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-11-27  0:07 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-11-26  2:54 [PATCH v1 0/1] fix NBL issues in DPDK 25.11-rc4 Dimon Zhao
2025-11-26  2:54 ` [PATCH v1 1/1] net/nbl: fix Rx/Tx stats concurrency Dimon Zhao
2025-11-27  0:07   ` Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).