[RFC] net/af_packet: make stats reset reliable

DPDK patches and discussions
 help / color / mirror / Atom feed

* [RFC] net/af_packet: make stats reset reliable
@ 2024-04-25 17:46 Ferruh Yigit
  2024-04-26 11:33 ` Morten Brørup
                   ` (4 more replies)
  0 siblings, 5 replies; 179+ messages in thread
From: Ferruh Yigit @ 2024-04-25 17:46 UTC (permalink / raw)
  To: John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger,
	Morten Brørup

For stats reset, use an offset instead of zeroing out actual stats values,
get_stats() displays diff between stats and offset.
This way stats only updated in datapath and offset only updated in stats
reset function. This makes stats reset function more reliable.

As stats only written by single thread, we can remove 'volatile' qualifier
which should improve the performance in datapath.

Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
---
Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Morten Brørup <mb@smartsharesystems.com>

This update triggered by mail list discussion [1].

[1]
https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/
---
 drivers/net/af_packet/rte_eth_af_packet.c | 69 +++++++++++++++--------
 1 file changed, 44 insertions(+), 25 deletions(-)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 397a32db5886..2061cdab4997 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -51,8 +51,10 @@ struct pkt_rx_queue {
 	uint16_t in_port;
 	uint8_t vlan_strip;
 
-	volatile unsigned long rx_pkts;
-	volatile unsigned long rx_bytes;
+	uint64_t rx_pkts;
+	uint64_t rx_bytes;
+	uint64_t rx_pkts_offset;
+	uint64_t rx_bytes_offset;
 };
 
 struct pkt_tx_queue {
@@ -64,9 +66,12 @@ struct pkt_tx_queue {
 	unsigned int framecount;
 	unsigned int framenum;
 
-	volatile unsigned long tx_pkts;
-	volatile unsigned long err_pkts;
-	volatile unsigned long tx_bytes;
+	uint64_t tx_pkts;
+	uint64_t err_pkts;
+	uint64_t tx_bytes;
+	uint64_t tx_pkts_offset;
+	uint64_t err_pkts_offset;
+	uint64_t tx_bytes_offset;
 };
 
 struct pmd_internals {
@@ -385,8 +390,18 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	return 0;
 }
 
+
+static uint64_t
+stats_get_diff(uint64_t stats, uint64_t offset)
+{
+	if (stats >= offset)
+		return stats - offset;
+	/* unlikely wraparound case */
+	return UINT64_MAX + stats - offset;
+}
+
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned i, imax;
 	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
@@ -396,27 +411,29 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
 	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
 	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
 	for (i = 0; i < imax; i++) {
-		igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
-		igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
-		rx_total += igb_stats->q_ipackets[i];
-		rx_bytes_total += igb_stats->q_ibytes[i];
+		struct pkt_rx_queue *rxq = &internal->rx_queue[i];
+		stats->q_ipackets[i] = stats_get_diff(rxq->rx_pkts, rxq->rx_pkts_offset);
+		stats->q_ibytes[i] = stats_get_diff(rxq->rx_bytes, rxq->rx_bytes_offset);
+		rx_total += stats->q_ipackets[i];
+		rx_bytes_total += stats->q_ibytes[i];
 	}
 
 	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
 	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
 	for (i = 0; i < imax; i++) {
-		igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
-		igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
-		tx_total += igb_stats->q_opackets[i];
-		tx_err_total += internal->tx_queue[i].err_pkts;
-		tx_bytes_total += igb_stats->q_obytes[i];
+		struct pkt_tx_queue *txq = &internal->tx_queue[i];
+		stats->q_opackets[i] = stats_get_diff(txq->tx_pkts, txq->tx_pkts_offset);
+		stats->q_obytes[i] = stats_get_diff(txq->tx_bytes, txq->tx_bytes_offset);
+		tx_total += stats->q_opackets[i];
+		tx_err_total += stats_get_diff(txq->err_pkts, txq->err_pkts_offset);
+		tx_bytes_total += stats->q_obytes[i];
 	}
 
-	igb_stats->ipackets = rx_total;
-	igb_stats->ibytes = rx_bytes_total;
-	igb_stats->opackets = tx_total;
-	igb_stats->oerrors = tx_err_total;
-	igb_stats->obytes = tx_bytes_total;
+	stats->ipackets = rx_total;
+	stats->ibytes = rx_bytes_total;
+	stats->opackets = tx_total;
+	stats->oerrors = tx_err_total;
+	stats->obytes = tx_bytes_total;
 	return 0;
 }
 
@@ -427,14 +444,16 @@ eth_stats_reset(struct rte_eth_dev *dev)
 	struct pmd_internals *internal = dev->data->dev_private;
 
 	for (i = 0; i < internal->nb_queues; i++) {
-		internal->rx_queue[i].rx_pkts = 0;
-		internal->rx_queue[i].rx_bytes = 0;
+		struct pkt_rx_queue *rxq = &internal->rx_queue[i];
+		rxq->rx_pkts_offset = rxq->rx_pkts;
+		rxq->rx_bytes_offset = rxq->rx_bytes;
 	}
 
 	for (i = 0; i < internal->nb_queues; i++) {
-		internal->tx_queue[i].tx_pkts = 0;
-		internal->tx_queue[i].err_pkts = 0;
-		internal->tx_queue[i].tx_bytes = 0;
+		struct pkt_tx_queue *txq = &internal->tx_queue[i];
+		txq->tx_pkts_offset = txq->tx_pkts;
+		txq->err_pkts_offset = txq->err_pkts;
+		txq->tx_bytes_offset = txq->tx_bytes;
 	}
 
 	return 0;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [RFC] net/af_packet: make stats reset reliable
  2024-04-25 17:46 [RFC] net/af_packet: make stats reset reliable Ferruh Yigit
@ 2024-04-26 11:33 ` Morten Brørup
  2024-04-26 13:37   ` Ferruh Yigit
  2024-04-28 15:42   ` Mattias Rönnblom
  2024-04-26 14:38 ` [RFC v2] " Ferruh Yigit
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 179+ messages in thread
From: Morten Brørup @ 2024-04-26 11:33 UTC (permalink / raw)
  To: Ferruh Yigit, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger

> +static uint64_t
> +stats_get_diff(uint64_t stats, uint64_t offset)
> +{
> +	if (stats >= offset)
> +		return stats - offset;
> +	/* unlikely wraparound case */
> +	return UINT64_MAX + stats - offset;

The numbers are unsigned, so wrapping comes for free.

Remove the comparison and always return stats - offset.

Using uint8_t for easier explanation, if offset is 255 and stats is 0, then the diff should be 1.
Returning stats - offset:
stats - offset = 0 - 255 = 0 - 0xFF = 1.

Returning UINT8_MAX + stats - offset is wrong:
UINT8_MAX + stats - offset = 255 - 0 - 255 = 0.

Besides that, it looks good to me.


While reviewing, I came across the rx_mbuf_alloc_failed counter in the rte_eth_dev_data structure:
https://elixir.bootlin.com/dpdk/v24.03/source/lib/ethdev/rte_ethdev.c#L3145
https://elixir.bootlin.com/dpdk/v24.03/source/lib/ethdev/ethdev_driver.h#L127

Doesn't it have the same problem?


BTW, the af_packet PMD doesn't increase the rx_mbuf_alloc_failed counter on mbuf allocation failures. But that's a separate bug.


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC] net/af_packet: make stats reset reliable
  2024-04-26 11:33 ` Morten Brørup
@ 2024-04-26 13:37   ` Ferruh Yigit
  2024-04-26 14:56     ` Morten Brørup
  2024-04-28 15:42   ` Mattias Rönnblom
  1 sibling, 1 reply; 179+ messages in thread
From: Ferruh Yigit @ 2024-04-26 13:37 UTC (permalink / raw)
  To: Morten Brørup, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger

On 4/26/2024 12:33 PM, Morten Brørup wrote:
>> +static uint64_t
>> +stats_get_diff(uint64_t stats, uint64_t offset)
>> +{
>> +	if (stats >= offset)
>> +		return stats - offset;
>> +	/* unlikely wraparound case */
>> +	return UINT64_MAX + stats - offset;
> 
> The numbers are unsigned, so wrapping comes for free.
> 
> Remove the comparison and always return stats - offset.
> 
> Using uint8_t for easier explanation, if offset is 255 and stats is 0, then the diff should be 1.
> Returning stats - offset:
> stats - offset = 0 - 255 = 0 - 0xFF = 1.
> 
> Returning UINT8_MAX + stats - offset is wrong:
> UINT8_MAX + stats - offset = 255 - 0 - 255 = 0.
> 
> Besides that, it looks good to me.
> 

Yes, it is wrong, and thanks for removing comparison tip.

But thinking twice, taking wrapping into account for a uint64_t variable
can be being too cautious anyway. I will remove it completely.

> 
> While reviewing, I came across the rx_mbuf_alloc_failed counter in the rte_eth_dev_data structure:
> https://elixir.bootlin.com/dpdk/v24.03/source/lib/ethdev/rte_ethdev.c#L3145
> https://elixir.bootlin.com/dpdk/v24.03/source/lib/ethdev/ethdev_driver.h#L127
> 
> Doesn't it have the same problem?
> 

stats reset problem? af_packet is not collecting 'rx_mbuf_alloc_failed',
so nothing to do there for af_packet.

> 
> BTW, the af_packet PMD doesn't increase the rx_mbuf_alloc_failed counter on mbuf allocation failures. But that's a separate bug.
> 

Yes it is missing, but I wouldn't call it a bug, just one of the stats
is missing. And yes this can be handled separately if required.


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [RFC v2] net/af_packet: make stats reset reliable
  2024-04-25 17:46 [RFC] net/af_packet: make stats reset reliable Ferruh Yigit
  2024-04-26 11:33 ` Morten Brørup
@ 2024-04-26 14:38 ` Ferruh Yigit
  2024-04-26 14:47   ` Morten Brørup
  2024-04-28 15:11   ` Mattias Rönnblom
  2024-04-26 21:28 ` [RFC] " Patrick Robb
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 179+ messages in thread
From: Ferruh Yigit @ 2024-04-26 14:38 UTC (permalink / raw)
  To: John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger,
	Morten Brørup

For stats reset, use an offset instead of zeroing out actual stats values,
get_stats() displays diff between stats and offset.
This way stats only updated in datapath and offset only updated in stats
reset function. This makes stats reset function more reliable.

As stats only written by single thread, we can remove 'volatile' qualifier
which should improve the performance in datapath.

Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
---
Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Morten Brørup <mb@smartsharesystems.com>

This update triggered by mail list discussion [1].

[1]
https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/

v2:
* Remove wrapping check for stats
---
 drivers/net/af_packet/rte_eth_af_packet.c | 66 ++++++++++++++---------
 1 file changed, 41 insertions(+), 25 deletions(-)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 397a32db5886..10c8e1e50139 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -51,8 +51,10 @@ struct pkt_rx_queue {
 	uint16_t in_port;
 	uint8_t vlan_strip;
 
-	volatile unsigned long rx_pkts;
-	volatile unsigned long rx_bytes;
+	uint64_t rx_pkts;
+	uint64_t rx_bytes;
+	uint64_t rx_pkts_offset;
+	uint64_t rx_bytes_offset;
 };
 
 struct pkt_tx_queue {
@@ -64,9 +66,12 @@ struct pkt_tx_queue {
 	unsigned int framecount;
 	unsigned int framenum;
 
-	volatile unsigned long tx_pkts;
-	volatile unsigned long err_pkts;
-	volatile unsigned long tx_bytes;
+	uint64_t tx_pkts;
+	uint64_t err_pkts;
+	uint64_t tx_bytes;
+	uint64_t tx_pkts_offset;
+	uint64_t err_pkts_offset;
+	uint64_t tx_bytes_offset;
 };
 
 struct pmd_internals {
@@ -385,8 +390,15 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	return 0;
 }
 
+
+static uint64_t
+stats_get_diff(uint64_t stats, uint64_t offset)
+{
+	return stats - offset;
+}
+
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned i, imax;
 	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
@@ -396,27 +408,29 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
 	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
 	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
 	for (i = 0; i < imax; i++) {
-		igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
-		igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
-		rx_total += igb_stats->q_ipackets[i];
-		rx_bytes_total += igb_stats->q_ibytes[i];
+		struct pkt_rx_queue *rxq = &internal->rx_queue[i];
+		stats->q_ipackets[i] = stats_get_diff(rxq->rx_pkts, rxq->rx_pkts_offset);
+		stats->q_ibytes[i] = stats_get_diff(rxq->rx_bytes, rxq->rx_bytes_offset);
+		rx_total += stats->q_ipackets[i];
+		rx_bytes_total += stats->q_ibytes[i];
 	}
 
 	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
 	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
 	for (i = 0; i < imax; i++) {
-		igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
-		igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
-		tx_total += igb_stats->q_opackets[i];
-		tx_err_total += internal->tx_queue[i].err_pkts;
-		tx_bytes_total += igb_stats->q_obytes[i];
+		struct pkt_tx_queue *txq = &internal->tx_queue[i];
+		stats->q_opackets[i] = stats_get_diff(txq->tx_pkts, txq->tx_pkts_offset);
+		stats->q_obytes[i] = stats_get_diff(txq->tx_bytes, txq->tx_bytes_offset);
+		tx_total += stats->q_opackets[i];
+		tx_err_total += stats_get_diff(txq->err_pkts, txq->err_pkts_offset);
+		tx_bytes_total += stats->q_obytes[i];
 	}
 
-	igb_stats->ipackets = rx_total;
-	igb_stats->ibytes = rx_bytes_total;
-	igb_stats->opackets = tx_total;
-	igb_stats->oerrors = tx_err_total;
-	igb_stats->obytes = tx_bytes_total;
+	stats->ipackets = rx_total;
+	stats->ibytes = rx_bytes_total;
+	stats->opackets = tx_total;
+	stats->oerrors = tx_err_total;
+	stats->obytes = tx_bytes_total;
 	return 0;
 }
 
@@ -427,14 +441,16 @@ eth_stats_reset(struct rte_eth_dev *dev)
 	struct pmd_internals *internal = dev->data->dev_private;
 
 	for (i = 0; i < internal->nb_queues; i++) {
-		internal->rx_queue[i].rx_pkts = 0;
-		internal->rx_queue[i].rx_bytes = 0;
+		struct pkt_rx_queue *rxq = &internal->rx_queue[i];
+		rxq->rx_pkts_offset = rxq->rx_pkts;
+		rxq->rx_bytes_offset = rxq->rx_bytes;
 	}
 
 	for (i = 0; i < internal->nb_queues; i++) {
-		internal->tx_queue[i].tx_pkts = 0;
-		internal->tx_queue[i].err_pkts = 0;
-		internal->tx_queue[i].tx_bytes = 0;
+		struct pkt_tx_queue *txq = &internal->tx_queue[i];
+		txq->tx_pkts_offset = txq->tx_pkts;
+		txq->err_pkts_offset = txq->err_pkts;
+		txq->tx_bytes_offset = txq->tx_bytes;
 	}
 
 	return 0;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [RFC v2] net/af_packet: make stats reset reliable
  2024-04-26 14:38 ` [RFC v2] " Ferruh Yigit
@ 2024-04-26 14:47   ` Morten Brørup
  2024-04-28 15:11   ` Mattias Rönnblom
  1 sibling, 0 replies; 179+ messages in thread
From: Morten Brørup @ 2024-04-26 14:47 UTC (permalink / raw)
  To: Ferruh Yigit, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger

> +static uint64_t
> +stats_get_diff(uint64_t stats, uint64_t offset)
> +{
> +	return stats - offset;
> +}

No need for this function; just subtract inline instead.

With the suggested change,
Reviewed-by: Morten Brørup <mb@smartsharesystems.com>


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [RFC] net/af_packet: make stats reset reliable
  2024-04-26 13:37   ` Ferruh Yigit
@ 2024-04-26 14:56     ` Morten Brørup
  0 siblings, 0 replies; 179+ messages in thread
From: Morten Brørup @ 2024-04-26 14:56 UTC (permalink / raw)
  To: Ferruh Yigit, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger

> From: Ferruh Yigit [mailto:ferruh.yigit@amd.com]
> Sent: Friday, 26 April 2024 15.38
> 
> On 4/26/2024 12:33 PM, Morten Brørup wrote:

[...]

> > While reviewing, I came across the rx_mbuf_alloc_failed counter in the
> rte_eth_dev_data structure:
> > https://elixir.bootlin.com/dpdk/v24.03/source/lib/ethdev/rte_ethdev.c#L3145
> >
> https://elixir.bootlin.com/dpdk/v24.03/source/lib/ethdev/ethdev_driver.h#L127
> >
> > Doesn't it have the same problem?
> >
> 
> stats reset problem? af_packet is not collecting 'rx_mbuf_alloc_failed',
> so nothing to do there for af_packet.

Agreed, not related to af_packet or this patch.

I'm just wondering if a similar or other patch should be applied to rx_mbuf_alloc_failed in the ethdev layer.
rx_mbuf_alloc_failed is shared by lcores, so perhaps it should be atomic, or atomically incremented by drivers using it.

> 
> >
> > BTW, the af_packet PMD doesn't increase the rx_mbuf_alloc_failed counter on
> mbuf allocation failures. But that's a separate bug.
> >
> 
> Yes it is missing, but I wouldn't call it a bug, just one of the stats
> is missing. And yes this can be handled separately if required.

OK, then just some stats missing, not a bug.
Quite useful stats for debugging a production system, if mbuf allocations fail.
But still, not related to this patch.


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC] net/af_packet: make stats reset reliable
  2024-04-25 17:46 [RFC] net/af_packet: make stats reset reliable Ferruh Yigit
  2024-04-26 11:33 ` Morten Brørup
  2024-04-26 14:38 ` [RFC v2] " Ferruh Yigit
@ 2024-04-26 21:28 ` Patrick Robb
  2024-05-03 15:45 ` [RFC v3] " Ferruh Yigit
  2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
  4 siblings, 0 replies; 179+ messages in thread
From: Patrick Robb @ 2024-04-26 21:28 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: John W. Linville, Thomas Monjalon, dev, Mattias Rönnblom,
	Stephen Hemminger, Morten Brørup

[-- Attachment #1: Type: text/plain, Size: 296 bytes --]

Recheck-request: iol-compile-amd64-testing

The DPDK Community Lab updated to the latest Alpine image yesterday, which
resulted in all Alpine builds failing. The failure is unrelated to your
patch, and this recheck should remove the fail on Patchwork, as we have
disabled Alpine testing for now.

[-- Attachment #2: Type: text/html, Size: 361 bytes --]

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-04-26 14:38 ` [RFC v2] " Ferruh Yigit
  2024-04-26 14:47   ` Morten Brørup
@ 2024-04-28 15:11   ` Mattias Rönnblom
  2024-05-01 16:19     ` Ferruh Yigit
  2024-05-07  7:23     ` Mattias Rönnblom
  1 sibling, 2 replies; 179+ messages in thread
From: Mattias Rönnblom @ 2024-04-28 15:11 UTC (permalink / raw)
  To: Ferruh Yigit, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger,
	Morten Brørup

On 2024-04-26 16:38, Ferruh Yigit wrote:
> For stats reset, use an offset instead of zeroing out actual stats values,
> get_stats() displays diff between stats and offset.
> This way stats only updated in datapath and offset only updated in stats
> reset function. This makes stats reset function more reliable.
> 
> As stats only written by single thread, we can remove 'volatile' qualifier
> which should improve the performance in datapath.
> 

volatile wouldn't help you if you had multiple writers, so that can't be 
the reason for its removal. It would be more accurate to say it should 
be replaced with atomic updates. If you don't use volatile and don't use 
atomics, you have to consider if the compiler can reach the conclusion 
that it does not need to store the counter value for future use *for 
that thread*. Since otherwise, I don't think the store actually needs to 
occur. Since DPDK statistics tend to work, it's pretty obvious that 
current compilers tend not to reach this conclusion.

If this should be done 100% properly, the update operation should be a 
non-atomic load, non-atomic add, and an atomic store. Similarly, for the 
reset, the offset store should be atomic.

Considered the state of the rest of the DPDK code base, I think a 
non-atomic, non-volatile solution is also fine.

(That said, I think we're better off just deprecating stats reset 
altogether, and returning -ENOTSUP here meanwhile.)

> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
> ---
> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> Cc: Stephen Hemminger <stephen@networkplumber.org>
> Cc: Morten Brørup <mb@smartsharesystems.com>
> 
> This update triggered by mail list discussion [1].
> 
> [1]
> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/
> 
> v2:
> * Remove wrapping check for stats
> ---
>   drivers/net/af_packet/rte_eth_af_packet.c | 66 ++++++++++++++---------
>   1 file changed, 41 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
> index 397a32db5886..10c8e1e50139 100644
> --- a/drivers/net/af_packet/rte_eth_af_packet.c
> +++ b/drivers/net/af_packet/rte_eth_af_packet.c
> @@ -51,8 +51,10 @@ struct pkt_rx_queue {
>   	uint16_t in_port;
>   	uint8_t vlan_strip;
>   
> -	volatile unsigned long rx_pkts;
> -	volatile unsigned long rx_bytes;
> +	uint64_t rx_pkts;
> +	uint64_t rx_bytes;
> +	uint64_t rx_pkts_offset;
> +	uint64_t rx_bytes_offset;

I suggest you introduce a separate struct for reset-able counters. It'll 
make things cleaner, and you can sneak in atomics without too much 
atomics-related bloat.

struct counter
{
	uint64_t count;
	uint64_t offset;
};

/../
	struct counter rx_pkts;
	struct counter rx_bytes;
/../

static uint64_t
counter_value(struct counter *counter)
{
	uint64_t count = __atomic_load_n(&counter->count, __ATOMIC_RELAXED);
	uint64_t offset = __atomic_load_n(&counter->offset, __ATOMIC_RELAXED);

	return count + offset;
}

static void
counter_reset(struct counter *counter)
{
	uint64_t count = __atomic_load_n(&counter->count, __ATOMIC_RELAXED);

	__atomic_store_n(&counter->offset, count, __ATOMIC_RELAXED);
}

static void
counter_add(struct counter *counter, uint64_t operand)
{
	__atomic_store_n(&counter->count, counter->count + operand, 
__ATOMIC_RELAXED);
}

You'd have to port this to <rte_stdatomic.h> calls, which prevents 
non-atomic loads from RTE_ATOMIC()s. The non-atomic reads above must be 
replaced with explicit relaxed non-atomic load. Otherwise, if you just 
use "counter->count", that would be an atomic load with sequential 
consistency memory order on C11 atomics-based builds, which would result 
in a barrier, at least on weakly ordered machines (e.g., ARM).

I would still use a struct and some helper-functions even for the less 
ambitious, non-atomic variant.

The only drawback of using GCC built-ins type atomics here, versus an 
atomic- and volatile-free approach, is that current compilers seems to 
refuse merging atomic stores. It's beyond me why this is the case. If 
you store to a variable twice in quick succession, it'll be two store 
machine instructions, even in cases where the compiler *knows* the value 
is identical. So volatile, even though you didn't ask for it. Weird.

So if you have a loop, you may want to make an "counter_add()" in the 
end from a temporary, to get the final 0.001% of performance.

If the tech board thinks MT-safe reset-able software-manage statistics 
is the future (as opposed to dropping reset support, for example), I 
think this stuff should go into a separate header file, so other PMDs 
can reuse it. Maybe out of scope for this patch.

>   };
>   
>   struct pkt_tx_queue {
> @@ -64,9 +66,12 @@ struct pkt_tx_queue {
>   	unsigned int framecount;
>   	unsigned int framenum;
>   
> -	volatile unsigned long tx_pkts;
> -	volatile unsigned long err_pkts;
> -	volatile unsigned long tx_bytes;
> +	uint64_t tx_pkts;
> +	uint64_t err_pkts;
> +	uint64_t tx_bytes;
> +	uint64_t tx_pkts_offset;
> +	uint64_t err_pkts_offset;
> +	uint64_t tx_bytes_offset;
>   };
>   
>   struct pmd_internals {
> @@ -385,8 +390,15 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
>   	return 0;
>   }
>   
> +
> +static uint64_t
> +stats_get_diff(uint64_t stats, uint64_t offset)
> +{
> +	return stats - offset;
> +}
> +
>   static int
> -eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
> +eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
>   {
>   	unsigned i, imax;
>   	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
> @@ -396,27 +408,29 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
>   	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
>   	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
>   	for (i = 0; i < imax; i++) {
> -		igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
> -		igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
> -		rx_total += igb_stats->q_ipackets[i];
> -		rx_bytes_total += igb_stats->q_ibytes[i];
> +		struct pkt_rx_queue *rxq = &internal->rx_queue[i];
> +		stats->q_ipackets[i] = stats_get_diff(rxq->rx_pkts, rxq->rx_pkts_offset);
> +		stats->q_ibytes[i] = stats_get_diff(rxq->rx_bytes, rxq->rx_bytes_offset);
> +		rx_total += stats->q_ipackets[i];
> +		rx_bytes_total += stats->q_ibytes[i];
>   	}
>   
>   	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
>   	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
>   	for (i = 0; i < imax; i++) {
> -		igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
> -		igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
> -		tx_total += igb_stats->q_opackets[i];
> -		tx_err_total += internal->tx_queue[i].err_pkts;
> -		tx_bytes_total += igb_stats->q_obytes[i];
> +		struct pkt_tx_queue *txq = &internal->tx_queue[i];
> +		stats->q_opackets[i] = stats_get_diff(txq->tx_pkts, txq->tx_pkts_offset);
> +		stats->q_obytes[i] = stats_get_diff(txq->tx_bytes, txq->tx_bytes_offset);
> +		tx_total += stats->q_opackets[i];
> +		tx_err_total += stats_get_diff(txq->err_pkts, txq->err_pkts_offset);
> +		tx_bytes_total += stats->q_obytes[i];
>   	}
>   
> -	igb_stats->ipackets = rx_total;
> -	igb_stats->ibytes = rx_bytes_total;
> -	igb_stats->opackets = tx_total;
> -	igb_stats->oerrors = tx_err_total;
> -	igb_stats->obytes = tx_bytes_total;
> +	stats->ipackets = rx_total;
> +	stats->ibytes = rx_bytes_total;
> +	stats->opackets = tx_total;
> +	stats->oerrors = tx_err_total;
> +	stats->obytes = tx_bytes_total;
>   	return 0;
>   }
>   
> @@ -427,14 +441,16 @@ eth_stats_reset(struct rte_eth_dev *dev)
>   	struct pmd_internals *internal = dev->data->dev_private;
>   
>   	for (i = 0; i < internal->nb_queues; i++) {
> -		internal->rx_queue[i].rx_pkts = 0;
> -		internal->rx_queue[i].rx_bytes = 0;
> +		struct pkt_rx_queue *rxq = &internal->rx_queue[i];
> +		rxq->rx_pkts_offset = rxq->rx_pkts;
> +		rxq->rx_bytes_offset = rxq->rx_bytes;
>   	}
>   
>   	for (i = 0; i < internal->nb_queues; i++) {
> -		internal->tx_queue[i].tx_pkts = 0;
> -		internal->tx_queue[i].err_pkts = 0;
> -		internal->tx_queue[i].tx_bytes = 0;
> +		struct pkt_tx_queue *txq = &internal->tx_queue[i];
> +		txq->tx_pkts_offset = txq->tx_pkts;
> +		txq->err_pkts_offset = txq->err_pkts;
> +		txq->tx_bytes_offset = txq->tx_bytes;
>   	}
>   
>   	return 0;

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC] net/af_packet: make stats reset reliable
  2024-04-26 11:33 ` Morten Brørup
  2024-04-26 13:37   ` Ferruh Yigit
@ 2024-04-28 15:42   ` Mattias Rönnblom
  1 sibling, 0 replies; 179+ messages in thread
From: Mattias Rönnblom @ 2024-04-28 15:42 UTC (permalink / raw)
  To: Morten Brørup, Ferruh Yigit, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger

On 2024-04-26 13:33, Morten Brørup wrote:
>> +static uint64_t
>> +stats_get_diff(uint64_t stats, uint64_t offset)
>> +{
>> +	if (stats >= offset)
>> +		return stats - offset;
>> +	/* unlikely wraparound case */
>> +	return UINT64_MAX + stats - offset;
> 
> The numbers are unsigned, so wrapping comes for free.
> 

With 64-bit counters, will they ever wrap? If you constantly run 100 
Gbps it'll take > 1000 years before the byte counter wrap.

> Remove the comparison and always return stats - offset.
> 
> Using uint8_t for easier explanation, if offset is 255 and stats is 0, then the diff should be 1.
> Returning stats - offset:
> stats - offset = 0 - 255 = 0 - 0xFF = 1.
> 
> Returning UINT8_MAX + stats - offset is wrong:
> UINT8_MAX + stats - offset = 255 - 0 - 255 = 0.
> 
> Besides that, it looks good to me.
> 
> 
> While reviewing, I came across the rx_mbuf_alloc_failed counter in the rte_eth_dev_data structure:
> https://elixir.bootlin.com/dpdk/v24.03/source/lib/ethdev/rte_ethdev.c#L3145
> https://elixir.bootlin.com/dpdk/v24.03/source/lib/ethdev/ethdev_driver.h#L127
> 
> Doesn't it have the same problem?
> 
> 
> BTW, the af_packet PMD doesn't increase the rx_mbuf_alloc_failed counter on mbuf allocation failures. But that's a separate bug.
> 

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-04-28 15:11   ` Mattias Rönnblom
@ 2024-05-01 16:19     ` Ferruh Yigit
  2024-05-02  5:51       ` Mattias Rönnblom
  2024-05-07  7:23     ` Mattias Rönnblom
  1 sibling, 1 reply; 179+ messages in thread
From: Ferruh Yigit @ 2024-05-01 16:19 UTC (permalink / raw)
  To: Mattias Rönnblom, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger,
	Morten Brørup

On 4/28/2024 4:11 PM, Mattias Rönnblom wrote:
> On 2024-04-26 16:38, Ferruh Yigit wrote:
>> For stats reset, use an offset instead of zeroing out actual stats
>> values,
>> get_stats() displays diff between stats and offset.
>> This way stats only updated in datapath and offset only updated in stats
>> reset function. This makes stats reset function more reliable.
>>
>> As stats only written by single thread, we can remove 'volatile'
>> qualifier
>> which should improve the performance in datapath.
>>
> 
> volatile wouldn't help you if you had multiple writers, so that can't be
> the reason for its removal. It would be more accurate to say it should
> be replaced with atomic updates. If you don't use volatile and don't use
> atomics, you have to consider if the compiler can reach the conclusion
> that it does not need to store the counter value for future use *for
> that thread*. Since otherwise, I don't think the store actually needs to
> occur. Since DPDK statistics tend to work, it's pretty obvious that
> current compilers tend not to reach this conclusion.
> 

Thanks Mattias for clarifying why we need volatile or atomics even with
single writer.

> If this should be done 100% properly, the update operation should be a
> non-atomic load, non-atomic add, and an atomic store. Similarly, for the
> reset, the offset store should be atomic.
> 

ack

> Considered the state of the rest of the DPDK code base, I think a
> non-atomic, non-volatile solution is also fine.
> 

Yes, this seems working practically but I guess better to follow above
suggestion.

> (That said, I think we're better off just deprecating stats reset
> altogether, and returning -ENOTSUP here meanwhile.)
> 

As long as reset is reliable (here I mean it reset stats in every call)
and doesn't impact datapath performance, I am for to continue with it.
Returning non supported won't bring more benefit to users.

>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
>> ---
>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>> Cc: Stephen Hemminger <stephen@networkplumber.org>
>> Cc: Morten Brørup <mb@smartsharesystems.com>
>>
>> This update triggered by mail list discussion [1].
>>
>> [1]
>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/
>>
>> v2:
>> * Remove wrapping check for stats
>> ---
>>   drivers/net/af_packet/rte_eth_af_packet.c | 66 ++++++++++++++---------
>>   1 file changed, 41 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/net/af_packet/rte_eth_af_packet.c
>> b/drivers/net/af_packet/rte_eth_af_packet.c
>> index 397a32db5886..10c8e1e50139 100644
>> --- a/drivers/net/af_packet/rte_eth_af_packet.c
>> +++ b/drivers/net/af_packet/rte_eth_af_packet.c
>> @@ -51,8 +51,10 @@ struct pkt_rx_queue {
>>       uint16_t in_port;
>>       uint8_t vlan_strip;
>>   -    volatile unsigned long rx_pkts;
>> -    volatile unsigned long rx_bytes;
>> +    uint64_t rx_pkts;
>> +    uint64_t rx_bytes;
>> +    uint64_t rx_pkts_offset;
>> +    uint64_t rx_bytes_offset;
> 
> I suggest you introduce a separate struct for reset-able counters. It'll
> make things cleaner, and you can sneak in atomics without too much
> atomics-related bloat.
> 
> struct counter
> {
>     uint64_t count;
>     uint64_t offset;
> };
> 
> /../
>     struct counter rx_pkts;
>     struct counter rx_bytes;
> /../
> 
> static uint64_t
> counter_value(struct counter *counter)
> {
>     uint64_t count = __atomic_load_n(&counter->count, __ATOMIC_RELAXED);
>     uint64_t offset = __atomic_load_n(&counter->offset, __ATOMIC_RELAXED);
> 
>     return count + offset;
> }
> 
> static void
> counter_reset(struct counter *counter)
> {
>     uint64_t count = __atomic_load_n(&counter->count, __ATOMIC_RELAXED);
> 
>     __atomic_store_n(&counter->offset, count, __ATOMIC_RELAXED);
> }
> 
> static void
> counter_add(struct counter *counter, uint64_t operand)
> {
>     __atomic_store_n(&counter->count, counter->count + operand,
> __ATOMIC_RELAXED);
> }
> 

Ack for separate struct for reset-able counters.

> You'd have to port this to <rte_stdatomic.h> calls, which prevents
> non-atomic loads from RTE_ATOMIC()s. The non-atomic reads above must be
> replaced with explicit relaxed non-atomic load. Otherwise, if you just
> use "counter->count", that would be an atomic load with sequential
> consistency memory order on C11 atomics-based builds, which would result
> in a barrier, at least on weakly ordered machines (e.g., ARM).
> 

I am not sure I understand above.
As load and add will be non-atomic, why not access them directly, like:
`uint64_t count = counter->count;`

So my understanding is, remove `volatile`, load and add without atomics,
and only use relaxed ordered atomics for store (to ensure value in
register stored to memory).

I will send a new version of RFC with above understanding.

> I would still use a struct and some helper-functions even for the less
> ambitious, non-atomic variant.
> 
> The only drawback of using GCC built-ins type atomics here, versus an
> atomic- and volatile-free approach, is that current compilers seems to
> refuse merging atomic stores. It's beyond me why this is the case. If
> you store to a variable twice in quick succession, it'll be two store
> machine instructions, even in cases where the compiler *knows* the value
> is identical. So volatile, even though you didn't ask for it. Weird.
> 
> So if you have a loop, you may want to make an "counter_add()" in the
> end from a temporary, to get the final 0.001% of performance.
> 

ack

I can't really say which one of the following is better (because of
store in empty poll), but I will keep it as it is (b.):

a.
for (i < nb_pkt) {
	stats =+ 1;
}


b.
for (i < nb_pkt) {
	tmp =+ 1;
}
stats += tmp;


c.
for (i < nb_pkt) {
	tmp =+ 1;
}
if (tmp)
	stats += tmp;



> If the tech board thinks MT-safe reset-able software-manage statistics
> is the future (as opposed to dropping reset support, for example), I
> think this stuff should go into a separate header file, so other PMDs
> can reuse it. Maybe out of scope for this patch.
> 

I don't think we need MT-safe reset, the patch is already out to
document current status.
For HW stats reset is already reliable and for SW drives offset based
approach can make is reliable.

Unless you explicitly asked for it, I don't think this is in the agenda
of the techboard.


>>   };
>>     struct pkt_tx_queue {
>> @@ -64,9 +66,12 @@ struct pkt_tx_queue {
>>       unsigned int framecount;
>>       unsigned int framenum;
>>   -    volatile unsigned long tx_pkts;
>> -    volatile unsigned long err_pkts;
>> -    volatile unsigned long tx_bytes;
>> +    uint64_t tx_pkts;
>> +    uint64_t err_pkts;
>> +    uint64_t tx_bytes;
>> +    uint64_t tx_pkts_offset;
>> +    uint64_t err_pkts_offset;
>> +    uint64_t tx_bytes_offset;
>>   };
>>     struct pmd_internals {
>> @@ -385,8 +390,15 @@ eth_dev_info(struct rte_eth_dev *dev, struct
>> rte_eth_dev_info *dev_info)
>>       return 0;
>>   }
>>   +
>> +static uint64_t
>> +stats_get_diff(uint64_t stats, uint64_t offset)
>> +{
>> +    return stats - offset;
>> +}
>> +
>>   static int
>> -eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
>> +eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
>>   {
>>       unsigned i, imax;
>>       unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
>> @@ -396,27 +408,29 @@ eth_stats_get(struct rte_eth_dev *dev, struct
>> rte_eth_stats *igb_stats)
>>       imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
>>               internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
>>       for (i = 0; i < imax; i++) {
>> -        igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
>> -        igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
>> -        rx_total += igb_stats->q_ipackets[i];
>> -        rx_bytes_total += igb_stats->q_ibytes[i];
>> +        struct pkt_rx_queue *rxq = &internal->rx_queue[i];
>> +        stats->q_ipackets[i] = stats_get_diff(rxq->rx_pkts,
>> rxq->rx_pkts_offset);
>> +        stats->q_ibytes[i] = stats_get_diff(rxq->rx_bytes,
>> rxq->rx_bytes_offset);
>> +        rx_total += stats->q_ipackets[i];
>> +        rx_bytes_total += stats->q_ibytes[i];
>>       }
>>         imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
>>               internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
>>       for (i = 0; i < imax; i++) {
>> -        igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
>> -        igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
>> -        tx_total += igb_stats->q_opackets[i];
>> -        tx_err_total += internal->tx_queue[i].err_pkts;
>> -        tx_bytes_total += igb_stats->q_obytes[i];
>> +        struct pkt_tx_queue *txq = &internal->tx_queue[i];
>> +        stats->q_opackets[i] = stats_get_diff(txq->tx_pkts,
>> txq->tx_pkts_offset);
>> +        stats->q_obytes[i] = stats_get_diff(txq->tx_bytes,
>> txq->tx_bytes_offset);
>> +        tx_total += stats->q_opackets[i];
>> +        tx_err_total += stats_get_diff(txq->err_pkts,
>> txq->err_pkts_offset);
>> +        tx_bytes_total += stats->q_obytes[i];
>>       }
>>   -    igb_stats->ipackets = rx_total;
>> -    igb_stats->ibytes = rx_bytes_total;
>> -    igb_stats->opackets = tx_total;
>> -    igb_stats->oerrors = tx_err_total;
>> -    igb_stats->obytes = tx_bytes_total;
>> +    stats->ipackets = rx_total;
>> +    stats->ibytes = rx_bytes_total;
>> +    stats->opackets = tx_total;
>> +    stats->oerrors = tx_err_total;
>> +    stats->obytes = tx_bytes_total;
>>       return 0;
>>   }
>>   @@ -427,14 +441,16 @@ eth_stats_reset(struct rte_eth_dev *dev)
>>       struct pmd_internals *internal = dev->data->dev_private;
>>         for (i = 0; i < internal->nb_queues; i++) {
>> -        internal->rx_queue[i].rx_pkts = 0;
>> -        internal->rx_queue[i].rx_bytes = 0;
>> +        struct pkt_rx_queue *rxq = &internal->rx_queue[i];
>> +        rxq->rx_pkts_offset = rxq->rx_pkts;
>> +        rxq->rx_bytes_offset = rxq->rx_bytes;
>>       }
>>         for (i = 0; i < internal->nb_queues; i++) {
>> -        internal->tx_queue[i].tx_pkts = 0;
>> -        internal->tx_queue[i].err_pkts = 0;
>> -        internal->tx_queue[i].tx_bytes = 0;
>> +        struct pkt_tx_queue *txq = &internal->tx_queue[i];
>> +        txq->tx_pkts_offset = txq->tx_pkts;
>> +        txq->err_pkts_offset = txq->err_pkts;
>> +        txq->tx_bytes_offset = txq->tx_bytes;
>>       }
>>         return 0;


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-01 16:19     ` Ferruh Yigit
@ 2024-05-02  5:51       ` Mattias Rönnblom
  2024-05-02 14:22         ` Ferruh Yigit
  0 siblings, 1 reply; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-02  5:51 UTC (permalink / raw)
  To: Ferruh Yigit, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger,
	Morten Brørup

On 2024-05-01 18:19, Ferruh Yigit wrote:
> On 4/28/2024 4:11 PM, Mattias Rönnblom wrote:
>> On 2024-04-26 16:38, Ferruh Yigit wrote:
>>> For stats reset, use an offset instead of zeroing out actual stats
>>> values,
>>> get_stats() displays diff between stats and offset.
>>> This way stats only updated in datapath and offset only updated in stats
>>> reset function. This makes stats reset function more reliable.
>>>
>>> As stats only written by single thread, we can remove 'volatile'
>>> qualifier
>>> which should improve the performance in datapath.
>>>
>>
>> volatile wouldn't help you if you had multiple writers, so that can't be
>> the reason for its removal. It would be more accurate to say it should
>> be replaced with atomic updates. If you don't use volatile and don't use
>> atomics, you have to consider if the compiler can reach the conclusion
>> that it does not need to store the counter value for future use *for
>> that thread*. Since otherwise, I don't think the store actually needs to
>> occur. Since DPDK statistics tend to work, it's pretty obvious that
>> current compilers tend not to reach this conclusion.
>>
> 
> Thanks Mattias for clarifying why we need volatile or atomics even with
> single writer.
> 
>> If this should be done 100% properly, the update operation should be a
>> non-atomic load, non-atomic add, and an atomic store. Similarly, for the
>> reset, the offset store should be atomic.
>>
> 
> ack
> 
>> Considered the state of the rest of the DPDK code base, I think a
>> non-atomic, non-volatile solution is also fine.
>>
> 
> Yes, this seems working practically but I guess better to follow above
> suggestion.
> 
>> (That said, I think we're better off just deprecating stats reset
>> altogether, and returning -ENOTSUP here meanwhile.)
>>
> 
> As long as reset is reliable (here I mean it reset stats in every call)
> and doesn't impact datapath performance, I am for to continue with it.
> Returning non supported won't bring more benefit to users.
> 
>>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
>>> ---
>>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>>> Cc: Stephen Hemminger <stephen@networkplumber.org>
>>> Cc: Morten Brørup <mb@smartsharesystems.com>
>>>
>>> This update triggered by mail list discussion [1].
>>>
>>> [1]
>>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/
>>>
>>> v2:
>>> * Remove wrapping check for stats
>>> ---
>>>    drivers/net/af_packet/rte_eth_af_packet.c | 66 ++++++++++++++---------
>>>    1 file changed, 41 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/drivers/net/af_packet/rte_eth_af_packet.c
>>> b/drivers/net/af_packet/rte_eth_af_packet.c
>>> index 397a32db5886..10c8e1e50139 100644
>>> --- a/drivers/net/af_packet/rte_eth_af_packet.c
>>> +++ b/drivers/net/af_packet/rte_eth_af_packet.c
>>> @@ -51,8 +51,10 @@ struct pkt_rx_queue {
>>>        uint16_t in_port;
>>>        uint8_t vlan_strip;
>>>    -    volatile unsigned long rx_pkts;
>>> -    volatile unsigned long rx_bytes;
>>> +    uint64_t rx_pkts;
>>> +    uint64_t rx_bytes;
>>> +    uint64_t rx_pkts_offset;
>>> +    uint64_t rx_bytes_offset;
>>
>> I suggest you introduce a separate struct for reset-able counters. It'll
>> make things cleaner, and you can sneak in atomics without too much
>> atomics-related bloat.
>>
>> struct counter
>> {
>>      uint64_t count;
>>      uint64_t offset;
>> };
>>
>> /../
>>      struct counter rx_pkts;
>>      struct counter rx_bytes;
>> /../
>>
>> static uint64_t
>> counter_value(struct counter *counter)
>> {
>>      uint64_t count = __atomic_load_n(&counter->count, __ATOMIC_RELAXED);
>>      uint64_t offset = __atomic_load_n(&counter->offset, __ATOMIC_RELAXED);
>>
>>      return count + offset;
>> }
>>
>> static void
>> counter_reset(struct counter *counter)
>> {
>>      uint64_t count = __atomic_load_n(&counter->count, __ATOMIC_RELAXED);
>>
>>      __atomic_store_n(&counter->offset, count, __ATOMIC_RELAXED);
>> }
>>
>> static void
>> counter_add(struct counter *counter, uint64_t operand)
>> {
>>      __atomic_store_n(&counter->count, counter->count + operand,
>> __ATOMIC_RELAXED);
>> }
>>
> 
> Ack for separate struct for reset-able counters.
> 
>> You'd have to port this to <rte_stdatomic.h> calls, which prevents
>> non-atomic loads from RTE_ATOMIC()s. The non-atomic reads above must be
>> replaced with explicit relaxed non-atomic load. Otherwise, if you just
>> use "counter->count", that would be an atomic load with sequential
>> consistency memory order on C11 atomics-based builds, which would result
>> in a barrier, at least on weakly ordered machines (e.g., ARM).
>>
> 
> I am not sure I understand above.
> As load and add will be non-atomic, why not access them directly, like:
> `uint64_t count = counter->count;`
> 

In case count is _Atomic (i.e., on enable_stdatomic=true builds), "count 
= counter->count" will imply a memory barrier. On x86_64, I think it 
will "only" be a compiler barrier (i.e., preventing optimization). On 
weakly ordered machines, it will result in a barrier-instruction (or an 
instruction-which-is-also-a-barrier, like in the example below).

include <stdatomic.h>

int relaxed_load(_Atomic int *p)
{
     atomic_load_explicit(p, memory_order_relaxed);
}

int direct_load(_Atomic int *p)
{
     return *p;
}

GCC 13.2 ARM64 ->

relaxed_load:
         ldr     w0, [x0]
         ret
direct_load:
         ldar    w0, [x0]
         ret

> So my understanding is, remove `volatile`, load and add without atomics,
> and only use relaxed ordered atomics for store (to ensure value in
> register stored to memory).
> 

Yes, that would be the best option, would the DPDK atomics API allow its 
implementation - but it doesn't. At least not if you care about what 
happens in enable_stdatomic=true builds.

The second-best option is to use a rte_memory_order_relaxed atomic load, 
a regular non-atomic add, and a relaxed atomic store.

> I will send a new version of RFC with above understanding.
> 
>> I would still use a struct and some helper-functions even for the less
>> ambitious, non-atomic variant.
>>
>> The only drawback of using GCC built-ins type atomics here, versus an
>> atomic- and volatile-free approach, is that current compilers seems to
>> refuse merging atomic stores. It's beyond me why this is the case. If
>> you store to a variable twice in quick succession, it'll be two store
>> machine instructions, even in cases where the compiler *knows* the value
>> is identical. So volatile, even though you didn't ask for it. Weird.
>>
>> So if you have a loop, you may want to make an "counter_add()" in the
>> end from a temporary, to get the final 0.001% of performance.
>>
> 
> ack
> 
> I can't really say which one of the following is better (because of
> store in empty poll), but I will keep it as it is (b.):
> 
> a.
> for (i < nb_pkt) {
> 	stats =+ 1;
> }
> 
> 
> b.
> for (i < nb_pkt) {
> 	tmp =+ 1;
> }
> stats += tmp;
> 
> 
> c.
> for (i < nb_pkt) {
> 	tmp =+ 1;
> }
> if (tmp)
> 	stats += tmp;
> 
> 
> 
>> If the tech board thinks MT-safe reset-able software-manage statistics
>> is the future (as opposed to dropping reset support, for example), I
>> think this stuff should go into a separate header file, so other PMDs
>> can reuse it. Maybe out of scope for this patch.
>>
> 
> I don't think we need MT-safe reset, the patch is already out to
> document current status.

Well, what you are working on is a MT-safe reset, in the sense it allows 
for one (1) resetting thread properly synchronize with multiple 
concurrent counter-updating threads.

It's not going to be completely MT safe, since you can't have two 
threads calling the reset function in parallel.

Any change to the API should make this clear.

> For HW stats reset is already reliable and for SW drives offset based
> approach can make is reliable.
> 
> Unless you explicitly asked for it, I don't think this is in the agenda
> of the techboard.
> 
> 
>>>    };
>>>      struct pkt_tx_queue {
>>> @@ -64,9 +66,12 @@ struct pkt_tx_queue {
>>>        unsigned int framecount;
>>>        unsigned int framenum;
>>>    -    volatile unsigned long tx_pkts;
>>> -    volatile unsigned long err_pkts;
>>> -    volatile unsigned long tx_bytes;
>>> +    uint64_t tx_pkts;
>>> +    uint64_t err_pkts;
>>> +    uint64_t tx_bytes;
>>> +    uint64_t tx_pkts_offset;
>>> +    uint64_t err_pkts_offset;
>>> +    uint64_t tx_bytes_offset;
>>>    };
>>>      struct pmd_internals {
>>> @@ -385,8 +390,15 @@ eth_dev_info(struct rte_eth_dev *dev, struct
>>> rte_eth_dev_info *dev_info)
>>>        return 0;
>>>    }
>>>    +
>>> +static uint64_t
>>> +stats_get_diff(uint64_t stats, uint64_t offset)
>>> +{
>>> +    return stats - offset;
>>> +}
>>> +
>>>    static int
>>> -eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
>>> +eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
>>>    {
>>>        unsigned i, imax;
>>>        unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
>>> @@ -396,27 +408,29 @@ eth_stats_get(struct rte_eth_dev *dev, struct
>>> rte_eth_stats *igb_stats)
>>>        imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
>>>                internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
>>>        for (i = 0; i < imax; i++) {
>>> -        igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
>>> -        igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
>>> -        rx_total += igb_stats->q_ipackets[i];
>>> -        rx_bytes_total += igb_stats->q_ibytes[i];
>>> +        struct pkt_rx_queue *rxq = &internal->rx_queue[i];
>>> +        stats->q_ipackets[i] = stats_get_diff(rxq->rx_pkts,
>>> rxq->rx_pkts_offset);
>>> +        stats->q_ibytes[i] = stats_get_diff(rxq->rx_bytes,
>>> rxq->rx_bytes_offset);
>>> +        rx_total += stats->q_ipackets[i];
>>> +        rx_bytes_total += stats->q_ibytes[i];
>>>        }
>>>          imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
>>>                internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
>>>        for (i = 0; i < imax; i++) {
>>> -        igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
>>> -        igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
>>> -        tx_total += igb_stats->q_opackets[i];
>>> -        tx_err_total += internal->tx_queue[i].err_pkts;
>>> -        tx_bytes_total += igb_stats->q_obytes[i];
>>> +        struct pkt_tx_queue *txq = &internal->tx_queue[i];
>>> +        stats->q_opackets[i] = stats_get_diff(txq->tx_pkts,
>>> txq->tx_pkts_offset);
>>> +        stats->q_obytes[i] = stats_get_diff(txq->tx_bytes,
>>> txq->tx_bytes_offset);
>>> +        tx_total += stats->q_opackets[i];
>>> +        tx_err_total += stats_get_diff(txq->err_pkts,
>>> txq->err_pkts_offset);
>>> +        tx_bytes_total += stats->q_obytes[i];
>>>        }
>>>    -    igb_stats->ipackets = rx_total;
>>> -    igb_stats->ibytes = rx_bytes_total;
>>> -    igb_stats->opackets = tx_total;
>>> -    igb_stats->oerrors = tx_err_total;
>>> -    igb_stats->obytes = tx_bytes_total;
>>> +    stats->ipackets = rx_total;
>>> +    stats->ibytes = rx_bytes_total;
>>> +    stats->opackets = tx_total;
>>> +    stats->oerrors = tx_err_total;
>>> +    stats->obytes = tx_bytes_total;
>>>        return 0;
>>>    }
>>>    @@ -427,14 +441,16 @@ eth_stats_reset(struct rte_eth_dev *dev)
>>>        struct pmd_internals *internal = dev->data->dev_private;
>>>          for (i = 0; i < internal->nb_queues; i++) {
>>> -        internal->rx_queue[i].rx_pkts = 0;
>>> -        internal->rx_queue[i].rx_bytes = 0;
>>> +        struct pkt_rx_queue *rxq = &internal->rx_queue[i];
>>> +        rxq->rx_pkts_offset = rxq->rx_pkts;
>>> +        rxq->rx_bytes_offset = rxq->rx_bytes;
>>>        }
>>>          for (i = 0; i < internal->nb_queues; i++) {
>>> -        internal->tx_queue[i].tx_pkts = 0;
>>> -        internal->tx_queue[i].err_pkts = 0;
>>> -        internal->tx_queue[i].tx_bytes = 0;
>>> +        struct pkt_tx_queue *txq = &internal->tx_queue[i];
>>> +        txq->tx_pkts_offset = txq->tx_pkts;
>>> +        txq->err_pkts_offset = txq->err_pkts;
>>> +        txq->tx_bytes_offset = txq->tx_bytes;
>>>        }
>>>          return 0;
> 

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-02  5:51       ` Mattias Rönnblom
@ 2024-05-02 14:22         ` Ferruh Yigit
  2024-05-02 15:59           ` Stephen Hemminger
  2024-05-02 17:37           ` Mattias Rönnblom
  0 siblings, 2 replies; 179+ messages in thread
From: Ferruh Yigit @ 2024-05-02 14:22 UTC (permalink / raw)
  To: Mattias Rönnblom, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger,
	Morten Brørup

On 5/2/2024 6:51 AM, Mattias Rönnblom wrote:
> On 2024-05-01 18:19, Ferruh Yigit wrote:
>> On 4/28/2024 4:11 PM, Mattias Rönnblom wrote:
>>> On 2024-04-26 16:38, Ferruh Yigit wrote:
>>>> For stats reset, use an offset instead of zeroing out actual stats
>>>> values,
>>>> get_stats() displays diff between stats and offset.
>>>> This way stats only updated in datapath and offset only updated in
>>>> stats
>>>> reset function. This makes stats reset function more reliable.
>>>>
>>>> As stats only written by single thread, we can remove 'volatile'
>>>> qualifier
>>>> which should improve the performance in datapath.
>>>>
>>>
>>> volatile wouldn't help you if you had multiple writers, so that can't be
>>> the reason for its removal. It would be more accurate to say it should
>>> be replaced with atomic updates. If you don't use volatile and don't use
>>> atomics, you have to consider if the compiler can reach the conclusion
>>> that it does not need to store the counter value for future use *for
>>> that thread*. Since otherwise, I don't think the store actually needs to
>>> occur. Since DPDK statistics tend to work, it's pretty obvious that
>>> current compilers tend not to reach this conclusion.
>>>
>>
>> Thanks Mattias for clarifying why we need volatile or atomics even with
>> single writer.
>>
>>> If this should be done 100% properly, the update operation should be a
>>> non-atomic load, non-atomic add, and an atomic store. Similarly, for the
>>> reset, the offset store should be atomic.
>>>
>>
>> ack
>>
>>> Considered the state of the rest of the DPDK code base, I think a
>>> non-atomic, non-volatile solution is also fine.
>>>
>>
>> Yes, this seems working practically but I guess better to follow above
>> suggestion.
>>
>>> (That said, I think we're better off just deprecating stats reset
>>> altogether, and returning -ENOTSUP here meanwhile.)
>>>
>>
>> As long as reset is reliable (here I mean it reset stats in every call)
>> and doesn't impact datapath performance, I am for to continue with it.
>> Returning non supported won't bring more benefit to users.
>>
>>>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
>>>> ---
>>>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>>>> Cc: Stephen Hemminger <stephen@networkplumber.org>
>>>> Cc: Morten Brørup <mb@smartsharesystems.com>
>>>>
>>>> This update triggered by mail list discussion [1].
>>>>
>>>> [1]
>>>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/
>>>>
>>>> v2:
>>>> * Remove wrapping check for stats
>>>> ---
>>>>    drivers/net/af_packet/rte_eth_af_packet.c | 66
>>>> ++++++++++++++---------
>>>>    1 file changed, 41 insertions(+), 25 deletions(-)
>>>>
>>>> diff --git a/drivers/net/af_packet/rte_eth_af_packet.c
>>>> b/drivers/net/af_packet/rte_eth_af_packet.c
>>>> index 397a32db5886..10c8e1e50139 100644
>>>> --- a/drivers/net/af_packet/rte_eth_af_packet.c
>>>> +++ b/drivers/net/af_packet/rte_eth_af_packet.c
>>>> @@ -51,8 +51,10 @@ struct pkt_rx_queue {
>>>>        uint16_t in_port;
>>>>        uint8_t vlan_strip;
>>>>    -    volatile unsigned long rx_pkts;
>>>> -    volatile unsigned long rx_bytes;
>>>> +    uint64_t rx_pkts;
>>>> +    uint64_t rx_bytes;
>>>> +    uint64_t rx_pkts_offset;
>>>> +    uint64_t rx_bytes_offset;
>>>
>>> I suggest you introduce a separate struct for reset-able counters. It'll
>>> make things cleaner, and you can sneak in atomics without too much
>>> atomics-related bloat.
>>>
>>> struct counter
>>> {
>>>      uint64_t count;
>>>      uint64_t offset;
>>> };
>>>
>>> /../
>>>      struct counter rx_pkts;
>>>      struct counter rx_bytes;
>>> /../
>>>
>>> static uint64_t
>>> counter_value(struct counter *counter)
>>> {
>>>      uint64_t count = __atomic_load_n(&counter->count,
>>> __ATOMIC_RELAXED);
>>>      uint64_t offset = __atomic_load_n(&counter->offset,
>>> __ATOMIC_RELAXED);
>>>
>>>      return count + offset;
>>> }
>>>
>>> static void
>>> counter_reset(struct counter *counter)
>>> {
>>>      uint64_t count = __atomic_load_n(&counter->count,
>>> __ATOMIC_RELAXED);
>>>
>>>      __atomic_store_n(&counter->offset, count, __ATOMIC_RELAXED);
>>> }
>>>
>>> static void
>>> counter_add(struct counter *counter, uint64_t operand)
>>> {
>>>      __atomic_store_n(&counter->count, counter->count + operand,
>>> __ATOMIC_RELAXED);
>>> }
>>>
>>
>> Ack for separate struct for reset-able counters.
>>
>>> You'd have to port this to <rte_stdatomic.h> calls, which prevents
>>> non-atomic loads from RTE_ATOMIC()s. The non-atomic reads above must be
>>> replaced with explicit relaxed non-atomic load. Otherwise, if you just
>>> use "counter->count", that would be an atomic load with sequential
>>> consistency memory order on C11 atomics-based builds, which would result
>>> in a barrier, at least on weakly ordered machines (e.g., ARM).
>>>
>>
>> I am not sure I understand above.
>> As load and add will be non-atomic, why not access them directly, like:
>> `uint64_t count = counter->count;`
>>
> 
> In case count is _Atomic (i.e., on enable_stdatomic=true builds), "count
> = counter->count" will imply a memory barrier. On x86_64, I think it
> will "only" be a compiler barrier (i.e., preventing optimization). On
> weakly ordered machines, it will result in a barrier-instruction (or an
> instruction-which-is-also-a-barrier, like in the example below).
> 
> include <stdatomic.h>
> 
> int relaxed_load(_Atomic int *p)
> {
>     atomic_load_explicit(p, memory_order_relaxed);
> }
> 
> int direct_load(_Atomic int *p)
> {
>     return *p;
> }
> 
> GCC 13.2 ARM64 ->
> 
> relaxed_load:
>         ldr     w0, [x0]
>         ret
> direct_load:
>         ldar    w0, [x0]
>         ret
> 
>


Do we need to declare count as '_Atomic', I wasn't planning to make
variable _Atomic. This way assignment won't introduce any memory barrier.


>> So my understanding is, remove `volatile`, load and add without atomics,
>> and only use relaxed ordered atomics for store (to ensure value in
>> register stored to memory).
>>
> 
> Yes, that would be the best option, would the DPDK atomics API allow its
> implementation - but it doesn't. At least not if you care about what
> happens in enable_stdatomic=true builds.
> 
> The second-best option is to use a rte_memory_order_relaxed atomic load,
> a regular non-atomic add, and a relaxed atomic store.
> 
>> I will send a new version of RFC with above understanding.
>>
>>> I would still use a struct and some helper-functions even for the less
>>> ambitious, non-atomic variant.
>>>
>>> The only drawback of using GCC built-ins type atomics here, versus an
>>> atomic- and volatile-free approach, is that current compilers seems to
>>> refuse merging atomic stores. It's beyond me why this is the case. If
>>> you store to a variable twice in quick succession, it'll be two store
>>> machine instructions, even in cases where the compiler *knows* the value
>>> is identical. So volatile, even though you didn't ask for it. Weird.
>>>
>>> So if you have a loop, you may want to make an "counter_add()" in the
>>> end from a temporary, to get the final 0.001% of performance.
>>>
>>
>> ack
>>
>> I can't really say which one of the following is better (because of
>> store in empty poll), but I will keep it as it is (b.):
>>
>> a.
>> for (i < nb_pkt) {
>>     stats =+ 1;
>> }
>>
>>
>> b.
>> for (i < nb_pkt) {
>>     tmp =+ 1;
>> }
>> stats += tmp;
>>
>>
>> c.
>> for (i < nb_pkt) {
>>     tmp =+ 1;
>> }
>> if (tmp)
>>     stats += tmp;
>>
>>
>>
>>> If the tech board thinks MT-safe reset-able software-manage statistics
>>> is the future (as opposed to dropping reset support, for example), I
>>> think this stuff should go into a separate header file, so other PMDs
>>> can reuse it. Maybe out of scope for this patch.
>>>
>>
>> I don't think we need MT-safe reset, the patch is already out to
>> document current status.
> 
> Well, what you are working on is a MT-safe reset, in the sense it allows
> for one (1) resetting thread properly synchronize with multiple
> concurrent counter-updating threads.
> 
> It's not going to be completely MT safe, since you can't have two
> threads calling the reset function in parallel.
> 

This is what I meant with "MT-safe reset", so multiple threads not
allowed to call stats reset in parallel.

And for multiple concurrent counter-updating threads case, suggestion is
to stop forwarding.

Above two are the update I added to 'rte_eth_stats_reset()' API, and I
believe we can continue with this restriction for the API.

> Any change to the API should make this clear.
> 
>> For HW stats reset is already reliable and for SW drives offset based
>> approach can make is reliable.
>>
>> Unless you explicitly asked for it, I don't think this is in the agenda
>> of the techboard.
>>
>>


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-02 14:22         ` Ferruh Yigit
@ 2024-05-02 15:59           ` Stephen Hemminger
  2024-05-02 18:20             ` Ferruh Yigit
  2024-05-02 17:37           ` Mattias Rönnblom
  1 sibling, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-02 15:59 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: Mattias Rönnblom, John W. Linville, Thomas Monjalon, dev,
	Mattias Rönnblom, Morten Brørup

On Thu, 2 May 2024 15:22:35 +0100
Ferruh Yigit <ferruh.yigit@amd.com> wrote:

> > 
> > It's not going to be completely MT safe, since you can't have two
> > threads calling the reset function in parallel.
> >   
> 
> This is what I meant with "MT-safe reset", so multiple threads not
> allowed to call stats reset in parallel.
> 
> And for multiple concurrent counter-updating threads case, suggestion is
> to stop forwarding.
> 
> Above two are the update I added to 'rte_eth_stats_reset()' API, and

I can't see the point of full MT-safe reset. No other driver does it.
The ethdev control side api's are not MT-safe and making them truly
MT-safe would add additional ref count and locking.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-02 14:22         ` Ferruh Yigit
  2024-05-02 15:59           ` Stephen Hemminger
@ 2024-05-02 17:37           ` Mattias Rönnblom
  2024-05-02 18:26             ` Stephen Hemminger
  1 sibling, 1 reply; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-02 17:37 UTC (permalink / raw)
  To: Ferruh Yigit, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger,
	Morten Brørup

On 2024-05-02 16:22, Ferruh Yigit wrote:
> On 5/2/2024 6:51 AM, Mattias Rönnblom wrote:
>> On 2024-05-01 18:19, Ferruh Yigit wrote:
>>> On 4/28/2024 4:11 PM, Mattias Rönnblom wrote:
>>>> On 2024-04-26 16:38, Ferruh Yigit wrote:
>>>>> For stats reset, use an offset instead of zeroing out actual stats
>>>>> values,
>>>>> get_stats() displays diff between stats and offset.
>>>>> This way stats only updated in datapath and offset only updated in
>>>>> stats
>>>>> reset function. This makes stats reset function more reliable.
>>>>>
>>>>> As stats only written by single thread, we can remove 'volatile'
>>>>> qualifier
>>>>> which should improve the performance in datapath.
>>>>>
>>>>
>>>> volatile wouldn't help you if you had multiple writers, so that can't be
>>>> the reason for its removal. It would be more accurate to say it should
>>>> be replaced with atomic updates. If you don't use volatile and don't use
>>>> atomics, you have to consider if the compiler can reach the conclusion
>>>> that it does not need to store the counter value for future use *for
>>>> that thread*. Since otherwise, I don't think the store actually needs to
>>>> occur. Since DPDK statistics tend to work, it's pretty obvious that
>>>> current compilers tend not to reach this conclusion.
>>>>
>>>
>>> Thanks Mattias for clarifying why we need volatile or atomics even with
>>> single writer.
>>>
>>>> If this should be done 100% properly, the update operation should be a
>>>> non-atomic load, non-atomic add, and an atomic store. Similarly, for the
>>>> reset, the offset store should be atomic.
>>>>
>>>
>>> ack
>>>
>>>> Considered the state of the rest of the DPDK code base, I think a
>>>> non-atomic, non-volatile solution is also fine.
>>>>
>>>
>>> Yes, this seems working practically but I guess better to follow above
>>> suggestion.
>>>
>>>> (That said, I think we're better off just deprecating stats reset
>>>> altogether, and returning -ENOTSUP here meanwhile.)
>>>>
>>>
>>> As long as reset is reliable (here I mean it reset stats in every call)
>>> and doesn't impact datapath performance, I am for to continue with it.
>>> Returning non supported won't bring more benefit to users.
>>>
>>>>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
>>>>> ---
>>>>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>>>>> Cc: Stephen Hemminger <stephen@networkplumber.org>
>>>>> Cc: Morten Brørup <mb@smartsharesystems.com>
>>>>>
>>>>> This update triggered by mail list discussion [1].
>>>>>
>>>>> [1]
>>>>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/
>>>>>
>>>>> v2:
>>>>> * Remove wrapping check for stats
>>>>> ---
>>>>>     drivers/net/af_packet/rte_eth_af_packet.c | 66
>>>>> ++++++++++++++---------
>>>>>     1 file changed, 41 insertions(+), 25 deletions(-)
>>>>>
>>>>> diff --git a/drivers/net/af_packet/rte_eth_af_packet.c
>>>>> b/drivers/net/af_packet/rte_eth_af_packet.c
>>>>> index 397a32db5886..10c8e1e50139 100644
>>>>> --- a/drivers/net/af_packet/rte_eth_af_packet.c
>>>>> +++ b/drivers/net/af_packet/rte_eth_af_packet.c
>>>>> @@ -51,8 +51,10 @@ struct pkt_rx_queue {
>>>>>         uint16_t in_port;
>>>>>         uint8_t vlan_strip;
>>>>>     -    volatile unsigned long rx_pkts;
>>>>> -    volatile unsigned long rx_bytes;
>>>>> +    uint64_t rx_pkts;
>>>>> +    uint64_t rx_bytes;
>>>>> +    uint64_t rx_pkts_offset;
>>>>> +    uint64_t rx_bytes_offset;
>>>>
>>>> I suggest you introduce a separate struct for reset-able counters. It'll
>>>> make things cleaner, and you can sneak in atomics without too much
>>>> atomics-related bloat.
>>>>
>>>> struct counter
>>>> {
>>>>       uint64_t count;
>>>>       uint64_t offset;
>>>> };
>>>>
>>>> /../
>>>>       struct counter rx_pkts;
>>>>       struct counter rx_bytes;
>>>> /../
>>>>
>>>> static uint64_t
>>>> counter_value(struct counter *counter)
>>>> {
>>>>       uint64_t count = __atomic_load_n(&counter->count,
>>>> __ATOMIC_RELAXED);
>>>>       uint64_t offset = __atomic_load_n(&counter->offset,
>>>> __ATOMIC_RELAXED);
>>>>
>>>>       return count + offset;
>>>> }
>>>>
>>>> static void
>>>> counter_reset(struct counter *counter)
>>>> {
>>>>       uint64_t count = __atomic_load_n(&counter->count,
>>>> __ATOMIC_RELAXED);
>>>>
>>>>       __atomic_store_n(&counter->offset, count, __ATOMIC_RELAXED);
>>>> }
>>>>
>>>> static void
>>>> counter_add(struct counter *counter, uint64_t operand)
>>>> {
>>>>       __atomic_store_n(&counter->count, counter->count + operand,
>>>> __ATOMIC_RELAXED);
>>>> }
>>>>
>>>
>>> Ack for separate struct for reset-able counters.
>>>
>>>> You'd have to port this to <rte_stdatomic.h> calls, which prevents
>>>> non-atomic loads from RTE_ATOMIC()s. The non-atomic reads above must be
>>>> replaced with explicit relaxed non-atomic load. Otherwise, if you just
>>>> use "counter->count", that would be an atomic load with sequential
>>>> consistency memory order on C11 atomics-based builds, which would result
>>>> in a barrier, at least on weakly ordered machines (e.g., ARM).
>>>>
>>>
>>> I am not sure I understand above.
>>> As load and add will be non-atomic, why not access them directly, like:
>>> `uint64_t count = counter->count;`
>>>
>>
>> In case count is _Atomic (i.e., on enable_stdatomic=true builds), "count
>> = counter->count" will imply a memory barrier. On x86_64, I think it
>> will "only" be a compiler barrier (i.e., preventing optimization). On
>> weakly ordered machines, it will result in a barrier-instruction (or an
>> instruction-which-is-also-a-barrier, like in the example below).
>>
>> include <stdatomic.h>
>>
>> int relaxed_load(_Atomic int *p)
>> {
>>      atomic_load_explicit(p, memory_order_relaxed);
>> }
>>
>> int direct_load(_Atomic int *p)
>> {
>>      return *p;
>> }
>>
>> GCC 13.2 ARM64 ->
>>
>> relaxed_load:
>>          ldr     w0, [x0]
>>          ret
>> direct_load:
>>          ldar    w0, [x0]
>>          ret
>>
>>
> 
> 
> Do we need to declare count as '_Atomic', I wasn't planning to make
> variable _Atomic. This way assignment won't introduce any memory barrier.
> 

To use atomics in DPDK, the current requirements seems to be to use 
RTE_ATOMIC(). That macro expands to _Atomic in enable_stdatomic=true 
builds, and nothing otherwise.

Carefully crafted code using atomics will achieved the same performance 
and be more correct than the non-atomic variant. However, in practice, I 
think the non-atomic variant is very likely to produce the desired results.

> 
>>> So my understanding is, remove `volatile`, load and add without atomics,
>>> and only use relaxed ordered atomics for store (to ensure value in
>>> register stored to memory).
>>>
>>
>> Yes, that would be the best option, would the DPDK atomics API allow its
>> implementation - but it doesn't. At least not if you care about what
>> happens in enable_stdatomic=true builds.
>>
>> The second-best option is to use a rte_memory_order_relaxed atomic load,
>> a regular non-atomic add, and a relaxed atomic store.
>>
>>> I will send a new version of RFC with above understanding.
>>>
>>>> I would still use a struct and some helper-functions even for the less
>>>> ambitious, non-atomic variant.
>>>>
>>>> The only drawback of using GCC built-ins type atomics here, versus an
>>>> atomic- and volatile-free approach, is that current compilers seems to
>>>> refuse merging atomic stores. It's beyond me why this is the case. If
>>>> you store to a variable twice in quick succession, it'll be two store
>>>> machine instructions, even in cases where the compiler *knows* the value
>>>> is identical. So volatile, even though you didn't ask for it. Weird.
>>>>
>>>> So if you have a loop, you may want to make an "counter_add()" in the
>>>> end from a temporary, to get the final 0.001% of performance.
>>>>
>>>
>>> ack
>>>
>>> I can't really say which one of the following is better (because of
>>> store in empty poll), but I will keep it as it is (b.):
>>>
>>> a.
>>> for (i < nb_pkt) {
>>>      stats =+ 1;
>>> }
>>>
>>>
>>> b.
>>> for (i < nb_pkt) {
>>>      tmp =+ 1;
>>> }
>>> stats += tmp;
>>>
>>>
>>> c.
>>> for (i < nb_pkt) {
>>>      tmp =+ 1;
>>> }
>>> if (tmp)
>>>      stats += tmp;
>>>
>>>
>>>
>>>> If the tech board thinks MT-safe reset-able software-manage statistics
>>>> is the future (as opposed to dropping reset support, for example), I
>>>> think this stuff should go into a separate header file, so other PMDs
>>>> can reuse it. Maybe out of scope for this patch.
>>>>
>>>
>>> I don't think we need MT-safe reset, the patch is already out to
>>> document current status.
>>
>> Well, what you are working on is a MT-safe reset, in the sense it allows
>> for one (1) resetting thread properly synchronize with multiple
>> concurrent counter-updating threads.
>>
>> It's not going to be completely MT safe, since you can't have two
>> threads calling the reset function in parallel.
>>
> 
> This is what I meant with "MT-safe reset", so multiple threads not
> allowed to call stats reset in parallel.
> 
> And for multiple concurrent counter-updating threads case, suggestion is
> to stop forwarding.
> 
> Above two are the update I added to 'rte_eth_stats_reset()' API, and I
> believe we can continue with this restriction for the API.
> 
>> Any change to the API should make this clear.
>>
>>> For HW stats reset is already reliable and for SW drives offset based
>>> approach can make is reliable.
>>>
>>> Unless you explicitly asked for it, I don't think this is in the agenda
>>> of the techboard.
>>>
>>>
> 

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-02 15:59           ` Stephen Hemminger
@ 2024-05-02 18:20             ` Ferruh Yigit
  0 siblings, 0 replies; 179+ messages in thread
From: Ferruh Yigit @ 2024-05-02 18:20 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Mattias Rönnblom, John W. Linville, Thomas Monjalon, dev,
	Mattias Rönnblom, Morten Brørup

On 5/2/2024 4:59 PM, Stephen Hemminger wrote:
> On Thu, 2 May 2024 15:22:35 +0100
> Ferruh Yigit <ferruh.yigit@amd.com> wrote:
> 
>>>
>>> It's not going to be completely MT safe, since you can't have two
>>> threads calling the reset function in parallel.
>>>   
>>
>> This is what I meant with "MT-safe reset", so multiple threads not
>> allowed to call stats reset in parallel.
>>
>> And for multiple concurrent counter-updating threads case, suggestion is
>> to stop forwarding.
>>
>> Above two are the update I added to 'rte_eth_stats_reset()' API, and
> 
> I can't see the point of full MT-safe reset. No other driver does it.
> The ethdev control side api's are not MT-safe and making them truly
> MT-safe would add additional ref count and locking.
>

Agree, that is why clarified this in ethdev API:
https:/dpdk.org/patch/139681


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-02 17:37           ` Mattias Rönnblom
@ 2024-05-02 18:26             ` Stephen Hemminger
  2024-05-02 21:26               ` Mattias Rönnblom
  0 siblings, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-02 18:26 UTC (permalink / raw)
  To: Mattias Rönnblom
  Cc: Ferruh Yigit, John W. Linville, Thomas Monjalon, dev,
	Mattias Rönnblom, Morten Brørup

On Thu, 2 May 2024 19:37:28 +0200
Mattias Rönnblom <hofors@lysator.liu.se> wrote:

> > 
> > Do we need to declare count as '_Atomic', I wasn't planning to make
> > variable _Atomic. This way assignment won't introduce any memory barrier.
> >   
> 
> To use atomics in DPDK, the current requirements seems to be to use 
> RTE_ATOMIC(). That macro expands to _Atomic in enable_stdatomic=true 
> builds, and nothing otherwise.
> 
> Carefully crafted code using atomics will achieved the same performance 
> and be more correct than the non-atomic variant. However, in practice, I 
> think the non-atomic variant is very likely to produce the desired results.

You are confusing atomic usage for thread safety, with the necessity
of compiler barriers. 

Stats should not be volatile.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-02 18:26             ` Stephen Hemminger
@ 2024-05-02 21:26               ` Mattias Rönnblom
  2024-05-02 21:46                 ` Stephen Hemminger
  0 siblings, 1 reply; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-02 21:26 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Ferruh Yigit, John W. Linville, Thomas Monjalon, dev,
	Mattias Rönnblom, Morten Brørup

On 2024-05-02 20:26, Stephen Hemminger wrote:
> On Thu, 2 May 2024 19:37:28 +0200
> Mattias Rönnblom <hofors@lysator.liu.se> wrote:
> 
>>>
>>> Do we need to declare count as '_Atomic', I wasn't planning to make
>>> variable _Atomic. This way assignment won't introduce any memory barrier.
>>>    
>>
>> To use atomics in DPDK, the current requirements seems to be to use
>> RTE_ATOMIC(). That macro expands to _Atomic in enable_stdatomic=true
>> builds, and nothing otherwise.
>>
>> Carefully crafted code using atomics will achieved the same performance
>> and be more correct than the non-atomic variant. However, in practice, I
>> think the non-atomic variant is very likely to produce the desired results.
> 
> You are confusing atomic usage for thread safety, with the necessity
> of compiler barriers.
> 

Are you suggesting that program-level C11 atomic stores risk being 
delayed, indefinitely? I could only find a draft version of the 
standard, but there 7.17.3 says "Implementations should make atomic 
stores visible to atomic loads within a reasonable amount of time."

An atomic relaxed store will be much cheaper than a compiler barrier.

> Stats should not be volatile.

Sure, and I also don't think compiler barriers should be needed.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-02 21:26               ` Mattias Rönnblom
@ 2024-05-02 21:46                 ` Stephen Hemminger
  0 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-02 21:46 UTC (permalink / raw)
  To: Mattias Rönnblom
  Cc: Ferruh Yigit, John W. Linville, Thomas Monjalon, dev,
	Mattias Rönnblom, Morten Brørup

On Thu, 2 May 2024 23:26:31 +0200
Mattias Rönnblom <hofors@lysator.liu.se> wrote:

> > 
> > You are confusing atomic usage for thread safety, with the necessity
> > of compiler barriers.
> >   
> 
> Are you suggesting that program-level C11 atomic stores risk being 
> delayed, indefinitely? I could only find a draft version of the 
> standard, but there 7.17.3 says "Implementations should make atomic 
> stores visible to atomic loads within a reasonable amount of time."
> 
> An atomic relaxed store will be much cheaper than a compiler barrier.

There is a confusion between language designers C11 and system implementer and
CPU design. The language people confuse compiler with hardware in standards.
Because of course the compiler knows all (not).

Read the extended discussion on memory models in Linux kernel documentation.


https://www.kernel.org/doc/html/latest/core-api/wrappers/memory-barriers.html

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [RFC v3] net/af_packet: make stats reset reliable
  2024-04-25 17:46 [RFC] net/af_packet: make stats reset reliable Ferruh Yigit
                   ` (2 preceding siblings ...)
  2024-04-26 21:28 ` [RFC] " Patrick Robb
@ 2024-05-03 15:45 ` Ferruh Yigit
  2024-05-03 22:00   ` Stephen Hemminger
  2024-05-07 15:27   ` Morten Brørup
  2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
  4 siblings, 2 replies; 179+ messages in thread
From: Ferruh Yigit @ 2024-05-03 15:45 UTC (permalink / raw)
  To: John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger,
	Morten Brørup

For stats reset, use an offset instead of zeroing out actual stats values,
get_stats() displays diff between stats and offset.
This way stats only updated in datapath and offset only updated in stats
reset function. This makes stats reset function more reliable.

As stats only written by single thread, we can remove 'volatile' qualifier
which should improve the performance in datapath.

While updating around, 'igb_stats' parameter renamed as 'stats'.

Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
---
Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Morten Brørup <mb@smartsharesystems.com>

This update triggered by mail list discussion [1].

[1]
https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/

v2:
* Remove wrapping check for stats

v3:
* counter and offset put into same struct per stats
* Use atomic load / store for stats values
---
 drivers/net/af_packet/rte_eth_af_packet.c | 98 ++++++++++++++++-------
 1 file changed, 68 insertions(+), 30 deletions(-)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 6b7b16f3486d..ebef1cb06450 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -6,6 +6,7 @@
  * All rights reserved.
  */
 
+#include <rte_atomic.h>
 #include <rte_common.h>
 #include <rte_string_fns.h>
 #include <rte_mbuf.h>
@@ -40,6 +41,11 @@
 #define DFLT_FRAME_SIZE		(1 << 11)
 #define DFLT_FRAME_COUNT	(1 << 9)
 
+struct stats {
+	uint64_t counter;
+	uint64_t offset;
+};
+
 struct __rte_cache_aligned pkt_rx_queue {
 	int sockfd;
 
@@ -52,8 +58,8 @@ struct __rte_cache_aligned pkt_rx_queue {
 	uint16_t in_port;
 	uint8_t vlan_strip;
 
-	volatile unsigned long rx_pkts;
-	volatile unsigned long rx_bytes;
+	struct stats rx_pkts;
+	struct stats rx_bytes;
 };
 
 struct __rte_cache_aligned pkt_tx_queue {
@@ -65,9 +71,9 @@ struct __rte_cache_aligned pkt_tx_queue {
 	unsigned int framecount;
 	unsigned int framenum;
 
-	volatile unsigned long tx_pkts;
-	volatile unsigned long err_pkts;
-	volatile unsigned long tx_bytes;
+	struct stats tx_pkts;
+	struct stats err_pkts;
+	struct stats tx_bytes;
 };
 
 struct pmd_internals {
@@ -111,6 +117,34 @@ RTE_LOG_REGISTER_DEFAULT(af_packet_logtype, NOTICE);
 	rte_log(RTE_LOG_ ## level, af_packet_logtype, \
 		"%s(): " fmt ":%s\n", __func__, ##args, strerror(errno))
 
+static inline uint64_t
+stats_get(struct stats *s)
+{
+	uint64_t counter = rte_atomic_load_explicit(&s->counter,
+			rte_memory_order_relaxed);
+	uint64_t offset = rte_atomic_load_explicit(&s->offset,
+			rte_memory_order_relaxed);
+	return counter - offset;
+}
+
+static inline void
+stats_add(struct stats *s, uint16_t n)
+{
+	uint64_t counter = s->counter;
+	counter += n;
+	rte_atomic_store_explicit(&s->counter, counter,
+			rte_memory_order_relaxed);
+}
+
+static inline void
+stats_reset(struct stats *s)
+{
+	uint64_t counter = rte_atomic_load_explicit(&s->counter,
+			rte_memory_order_relaxed);
+	rte_atomic_store_explicit(&s->offset, counter,
+			rte_memory_order_relaxed);
+}
+
 static uint16_t
 eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
@@ -169,8 +203,8 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		num_rx_bytes += mbuf->pkt_len;
 	}
 	pkt_q->framenum = framenum;
-	pkt_q->rx_pkts += num_rx;
-	pkt_q->rx_bytes += num_rx_bytes;
+	stats_add(&pkt_q->rx_pkts, num_rx);
+	stats_add(&pkt_q->rx_bytes, num_rx_bytes);
 	return num_rx;
 }
 
@@ -305,9 +339,9 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	}
 
 	pkt_q->framenum = framenum;
-	pkt_q->tx_pkts += num_tx;
-	pkt_q->err_pkts += i - num_tx;
-	pkt_q->tx_bytes += num_tx_bytes;
+	stats_add(&pkt_q->tx_pkts, num_tx);
+	stats_add(&pkt_q->err_pkts, i - num_tx);
+	stats_add(&pkt_q->tx_bytes, num_tx_bytes);
 	return i;
 }
 
@@ -387,7 +421,7 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned i, imax;
 	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
@@ -397,27 +431,29 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
 	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
 	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
 	for (i = 0; i < imax; i++) {
-		igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
-		igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
-		rx_total += igb_stats->q_ipackets[i];
-		rx_bytes_total += igb_stats->q_ibytes[i];
+		struct pkt_rx_queue *rxq = &internal->rx_queue[i];
+		stats->q_ipackets[i] = stats_get(&rxq->rx_pkts);
+		stats->q_ibytes[i] = stats_get(&rxq->rx_bytes);
+		rx_total += stats->q_ipackets[i];
+		rx_bytes_total += stats->q_ibytes[i];
 	}
 
 	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
 	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
 	for (i = 0; i < imax; i++) {
-		igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
-		igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
-		tx_total += igb_stats->q_opackets[i];
-		tx_err_total += internal->tx_queue[i].err_pkts;
-		tx_bytes_total += igb_stats->q_obytes[i];
+		struct pkt_tx_queue *txq = &internal->tx_queue[i];
+		stats->q_opackets[i] = stats_get(&txq->tx_pkts);
+		stats->q_obytes[i] = stats_get(&txq->tx_bytes);
+		tx_total += stats->q_opackets[i];
+		tx_err_total += stats_get(&txq->err_pkts);
+		tx_bytes_total += stats->q_obytes[i];
 	}
 
-	igb_stats->ipackets = rx_total;
-	igb_stats->ibytes = rx_bytes_total;
-	igb_stats->opackets = tx_total;
-	igb_stats->oerrors = tx_err_total;
-	igb_stats->obytes = tx_bytes_total;
+	stats->ipackets = rx_total;
+	stats->ibytes = rx_bytes_total;
+	stats->opackets = tx_total;
+	stats->oerrors = tx_err_total;
+	stats->obytes = tx_bytes_total;
 	return 0;
 }
 
@@ -428,14 +464,16 @@ eth_stats_reset(struct rte_eth_dev *dev)
 	struct pmd_internals *internal = dev->data->dev_private;
 
 	for (i = 0; i < internal->nb_queues; i++) {
-		internal->rx_queue[i].rx_pkts = 0;
-		internal->rx_queue[i].rx_bytes = 0;
+		struct pkt_rx_queue *rxq = &internal->rx_queue[i];
+		stats_reset(&rxq->rx_pkts);
+		stats_reset(&rxq->rx_bytes);
 	}
 
 	for (i = 0; i < internal->nb_queues; i++) {
-		internal->tx_queue[i].tx_pkts = 0;
-		internal->tx_queue[i].err_pkts = 0;
-		internal->tx_queue[i].tx_bytes = 0;
+		struct pkt_tx_queue *txq = &internal->tx_queue[i];
+		stats_reset(&txq->tx_pkts);
+		stats_reset(&txq->err_pkts);
+		stats_reset(&txq->tx_bytes);
 	}
 
 	return 0;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-03 15:45 ` [RFC v3] " Ferruh Yigit
@ 2024-05-03 22:00   ` Stephen Hemminger
  2024-05-07 13:48     ` Ferruh Yigit
  2024-05-08  7:19     ` Mattias Rönnblom
  2024-05-07 15:27   ` Morten Brørup
  1 sibling, 2 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-03 22:00 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: John W. Linville, Thomas Monjalon, dev, Mattias Rönnblom,
	Morten Brørup

On Fri, 3 May 2024 16:45:47 +0100
Ferruh Yigit <ferruh.yigit@amd.com> wrote:

> For stats reset, use an offset instead of zeroing out actual stats values,
> get_stats() displays diff between stats and offset.
> This way stats only updated in datapath and offset only updated in stats
> reset function. This makes stats reset function more reliable.
> 
> As stats only written by single thread, we can remove 'volatile' qualifier
> which should improve the performance in datapath.
> 
> While updating around, 'igb_stats' parameter renamed as 'stats'.
> 
> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
> ---
> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> Cc: Stephen Hemminger <stephen@networkplumber.org>
> Cc: Morten Brørup <mb@smartsharesystems.com>
> 
> This update triggered by mail list discussion [1].
> 
> [1]
> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/


NAK

I did not hear a good argument why atomic or volatile was necessary in the first place.
Why?

Why is this driver special (a snowflake) compared to all the other drivers doing software
statistics (tap, virtio, xdp, ring, memif, netvsc, vmware)?

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-04-28 15:11   ` Mattias Rönnblom
  2024-05-01 16:19     ` Ferruh Yigit
@ 2024-05-07  7:23     ` Mattias Rönnblom
  2024-05-07 13:49       ` Ferruh Yigit
  2024-05-07 19:19       ` Morten Brørup
  1 sibling, 2 replies; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-07  7:23 UTC (permalink / raw)
  To: Ferruh Yigit, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger,
	Morten Brørup

On 2024-04-28 17:11, Mattias Rönnblom wrote:
> On 2024-04-26 16:38, Ferruh Yigit wrote:
>> For stats reset, use an offset instead of zeroing out actual stats 
>> values,
>> get_stats() displays diff between stats and offset.
>> This way stats only updated in datapath and offset only updated in stats
>> reset function. This makes stats reset function more reliable.
>>
>> As stats only written by single thread, we can remove 'volatile' 
>> qualifier
>> which should improve the performance in datapath.
>>
> 
> volatile wouldn't help you if you had multiple writers, so that can't be 
> the reason for its removal. It would be more accurate to say it should 
> be replaced with atomic updates. If you don't use volatile and don't use 
> atomics, you have to consider if the compiler can reach the conclusion 
> that it does not need to store the counter value for future use *for 
> that thread*. Since otherwise, I don't think the store actually needs to 
> occur. Since DPDK statistics tend to work, it's pretty obvious that 
> current compilers tend not to reach this conclusion.
> 
> If this should be done 100% properly, the update operation should be a 
> non-atomic load, non-atomic add, and an atomic store. Similarly, for the 
> reset, the offset store should be atomic.
> 
> Considered the state of the rest of the DPDK code base, I think a 
> non-atomic, non-volatile solution is also fine.
> 
> (That said, I think we're better off just deprecating stats reset 
> altogether, and returning -ENOTSUP here meanwhile.)
> 
>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
>> ---
>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>> Cc: Stephen Hemminger <stephen@networkplumber.org>
>> Cc: Morten Brørup <mb@smartsharesystems.com>
>>
>> This update triggered by mail list discussion [1].
>>
>> [1]
>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/
>>
>> v2:
>> * Remove wrapping check for stats
>> ---
>>   drivers/net/af_packet/rte_eth_af_packet.c | 66 ++++++++++++++---------
>>   1 file changed, 41 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/net/af_packet/rte_eth_af_packet.c 
>> b/drivers/net/af_packet/rte_eth_af_packet.c
>> index 397a32db5886..10c8e1e50139 100644
>> --- a/drivers/net/af_packet/rte_eth_af_packet.c
>> +++ b/drivers/net/af_packet/rte_eth_af_packet.c
>> @@ -51,8 +51,10 @@ struct pkt_rx_queue {
>>       uint16_t in_port;
>>       uint8_t vlan_strip;
>> -    volatile unsigned long rx_pkts;
>> -    volatile unsigned long rx_bytes;
>> +    uint64_t rx_pkts;
>> +    uint64_t rx_bytes;
>> +    uint64_t rx_pkts_offset;
>> +    uint64_t rx_bytes_offset;
> 
> I suggest you introduce a separate struct for reset-able counters. It'll 
> make things cleaner, and you can sneak in atomics without too much 
> atomics-related bloat.
> 
> struct counter
> {
>      uint64_t count;
>      uint64_t offset;
> };
> 
> /../
>      struct counter rx_pkts;
>      struct counter rx_bytes;
> /../
> 
> static uint64_t
> counter_value(struct counter *counter)
> {
>      uint64_t count = __atomic_load_n(&counter->count, __ATOMIC_RELAXED);
>      uint64_t offset = __atomic_load_n(&counter->offset, __ATOMIC_RELAXED);
> 

Since the count and the offset are written to independently, without any 
ordering restrictions, an update and a reset in quick succession may 
cause the offset store to be globally visible before the new count. In 
such a scenario, a reader could see an offset > count.

Thus, unless I'm missing something, one should add a

if (unlikely(offset > count))
	return 0;

here. With the appropriate comment explaining why this might be.

Another approach would be to think about what memory barriers may be 
required to make sure one sees the count update before the offset 
update, but, intuitively, that seems like both more complex and more 
costly (performance-wise).

>      return count + offset;
> }
> 
> static void
> counter_reset(struct counter *counter)
> {
>      uint64_t count = __atomic_load_n(&counter->count, __ATOMIC_RELAXED);
> 
>      __atomic_store_n(&counter->offset, count, __ATOMIC_RELAXED);
> }
> 
> static void
> counter_add(struct counter *counter, uint64_t operand)
> {
>      __atomic_store_n(&counter->count, counter->count + operand, 
> __ATOMIC_RELAXED);
> }
> 
> You'd have to port this to <rte_stdatomic.h> calls, which prevents 
> non-atomic loads from RTE_ATOMIC()s. The non-atomic reads above must be 
> replaced with explicit relaxed non-atomic load. Otherwise, if you just 
> use "counter->count", that would be an atomic load with sequential 
> consistency memory order on C11 atomics-based builds, which would result 
> in a barrier, at least on weakly ordered machines (e.g., ARM).
> 
> I would still use a struct and some helper-functions even for the less 
> ambitious, non-atomic variant.
> 
> The only drawback of using GCC built-ins type atomics here, versus an 
> atomic- and volatile-free approach, is that current compilers seems to 
> refuse merging atomic stores. It's beyond me why this is the case. If 
> you store to a variable twice in quick succession, it'll be two store 
> machine instructions, even in cases where the compiler *knows* the value 
> is identical. So volatile, even though you didn't ask for it. Weird.
> 
> So if you have a loop, you may want to make an "counter_add()" in the 
> end from a temporary, to get the final 0.001% of performance.
> 
> If the tech board thinks MT-safe reset-able software-manage statistics 
> is the future (as opposed to dropping reset support, for example), I 
> think this stuff should go into a separate header file, so other PMDs 
> can reuse it. Maybe out of scope for this patch.
> 
>>   };
>>   struct pkt_tx_queue {
>> @@ -64,9 +66,12 @@ struct pkt_tx_queue {
>>       unsigned int framecount;
>>       unsigned int framenum;
>> -    volatile unsigned long tx_pkts;
>> -    volatile unsigned long err_pkts;
>> -    volatile unsigned long tx_bytes;
>> +    uint64_t tx_pkts;
>> +    uint64_t err_pkts;
>> +    uint64_t tx_bytes;
>> +    uint64_t tx_pkts_offset;
>> +    uint64_t err_pkts_offset;
>> +    uint64_t tx_bytes_offset;
>>   };
>>   struct pmd_internals {
>> @@ -385,8 +390,15 @@ eth_dev_info(struct rte_eth_dev *dev, struct 
>> rte_eth_dev_info *dev_info)
>>       return 0;
>>   }
>> +
>> +static uint64_t
>> +stats_get_diff(uint64_t stats, uint64_t offset)
>> +{
>> +    return stats - offset;
>> +}
>> +
>>   static int
>> -eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
>> +eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
>>   {
>>       unsigned i, imax;
>>       unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
>> @@ -396,27 +408,29 @@ eth_stats_get(struct rte_eth_dev *dev, struct 
>> rte_eth_stats *igb_stats)
>>       imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
>>               internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
>>       for (i = 0; i < imax; i++) {
>> -        igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
>> -        igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
>> -        rx_total += igb_stats->q_ipackets[i];
>> -        rx_bytes_total += igb_stats->q_ibytes[i];
>> +        struct pkt_rx_queue *rxq = &internal->rx_queue[i];
>> +        stats->q_ipackets[i] = stats_get_diff(rxq->rx_pkts, 
>> rxq->rx_pkts_offset);
>> +        stats->q_ibytes[i] = stats_get_diff(rxq->rx_bytes, 
>> rxq->rx_bytes_offset);
>> +        rx_total += stats->q_ipackets[i];
>> +        rx_bytes_total += stats->q_ibytes[i];
>>       }
>>       imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
>>               internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
>>       for (i = 0; i < imax; i++) {
>> -        igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
>> -        igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
>> -        tx_total += igb_stats->q_opackets[i];
>> -        tx_err_total += internal->tx_queue[i].err_pkts;
>> -        tx_bytes_total += igb_stats->q_obytes[i];
>> +        struct pkt_tx_queue *txq = &internal->tx_queue[i];
>> +        stats->q_opackets[i] = stats_get_diff(txq->tx_pkts, 
>> txq->tx_pkts_offset);
>> +        stats->q_obytes[i] = stats_get_diff(txq->tx_bytes, 
>> txq->tx_bytes_offset);
>> +        tx_total += stats->q_opackets[i];
>> +        tx_err_total += stats_get_diff(txq->err_pkts, 
>> txq->err_pkts_offset);
>> +        tx_bytes_total += stats->q_obytes[i];
>>       }
>> -    igb_stats->ipackets = rx_total;
>> -    igb_stats->ibytes = rx_bytes_total;
>> -    igb_stats->opackets = tx_total;
>> -    igb_stats->oerrors = tx_err_total;
>> -    igb_stats->obytes = tx_bytes_total;
>> +    stats->ipackets = rx_total;
>> +    stats->ibytes = rx_bytes_total;
>> +    stats->opackets = tx_total;
>> +    stats->oerrors = tx_err_total;
>> +    stats->obytes = tx_bytes_total;
>>       return 0;
>>   }
>> @@ -427,14 +441,16 @@ eth_stats_reset(struct rte_eth_dev *dev)
>>       struct pmd_internals *internal = dev->data->dev_private;
>>       for (i = 0; i < internal->nb_queues; i++) {
>> -        internal->rx_queue[i].rx_pkts = 0;
>> -        internal->rx_queue[i].rx_bytes = 0;
>> +        struct pkt_rx_queue *rxq = &internal->rx_queue[i];
>> +        rxq->rx_pkts_offset = rxq->rx_pkts;
>> +        rxq->rx_bytes_offset = rxq->rx_bytes;
>>       }
>>       for (i = 0; i < internal->nb_queues; i++) {
>> -        internal->tx_queue[i].tx_pkts = 0;
>> -        internal->tx_queue[i].err_pkts = 0;
>> -        internal->tx_queue[i].tx_bytes = 0;
>> +        struct pkt_tx_queue *txq = &internal->tx_queue[i];
>> +        txq->tx_pkts_offset = txq->tx_pkts;
>> +        txq->err_pkts_offset = txq->err_pkts;
>> +        txq->tx_bytes_offset = txq->tx_bytes;
>>       }
>>       return 0;

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-03 22:00   ` Stephen Hemminger
@ 2024-05-07 13:48     ` Ferruh Yigit
  2024-05-07 14:52       ` Stephen Hemminger
  2024-05-08  7:19     ` Mattias Rönnblom
  1 sibling, 1 reply; 179+ messages in thread
From: Ferruh Yigit @ 2024-05-07 13:48 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: John W. Linville, Thomas Monjalon, dev, Mattias Rönnblom,
	Morten Brørup, Honnappa Nagarahalli

On 5/3/2024 11:00 PM, Stephen Hemminger wrote:
> On Fri, 3 May 2024 16:45:47 +0100
> Ferruh Yigit <ferruh.yigit@amd.com> wrote:
> 
>> For stats reset, use an offset instead of zeroing out actual stats values,
>> get_stats() displays diff between stats and offset.
>> This way stats only updated in datapath and offset only updated in stats
>> reset function. This makes stats reset function more reliable.
>>
>> As stats only written by single thread, we can remove 'volatile' qualifier
>> which should improve the performance in datapath.
>>
>> While updating around, 'igb_stats' parameter renamed as 'stats'.
>>
>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
>> ---
>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>> Cc: Stephen Hemminger <stephen@networkplumber.org>
>> Cc: Morten Brørup <mb@smartsharesystems.com>
>>
>> This update triggered by mail list discussion [1].
>>
>> [1]
>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/
> 
> 
> NAK
> 
> I did not hear a good argument why atomic or volatile was necessary in the first place.
> Why?
> 

Sure, the patch is done as RFC intentionally to discuss the approach.

Agree that volatile and atomics (fetch + add + store) is not required
for thread synchronization, as only one CPU updates stats.
Even this understanding is important because there are PMDs using full
atomics for stats update, like null PMD [1], this will help up clear them.

And there is a case, stats reset and stats updated in different threads
simultaneously, for this 'volatile' is not sufficient anyway and full
atomics is required. As this will cause performance impact we are
already saying stats update and reset can't happen at the same time [2].
With this update volatile and atomics are not required for this case too.
(Also using offset to increase stats reset reliability.)

In this patch volatile replaced with atomic load and atomic store (not
atomic fetch and add), to ensure that stats stored to memory and not
kept in device registers only.
With volatile, it is guaranteed that updated stats stored back to
memory, but without volatile and atomics I am not sure if this is
guaranteed. Practically I can see this working, but theoretically not
sure. This is similar concern with change in your patch that is casting
to volatile to ensure value read from memory [3].

Expectation is, only atomics load and store will have smaller
performance impact than volatile, ensuring memory load and store when
needed.

[1]
https://git.dpdk.org/dpdk/tree/drivers/net/null/rte_eth_null.c?h=v24.03#n105

[2]
https://patches.dpdk.org/project/dpdk/patch/20240425165308.1078454-1-ferruh.yigit@amd.com/

[3]
https://inbox.dpdk.org/dev/20240430154129.7347-1-stephen@networkplumber.org/
`#define READ_ONCE(var) (*((volatile typeof(var) *)(&(var))))`

> Why is this driver special (a snowflake) compared to all the other drivers doing software
> statistics (tap, virtio, xdp, ring, memif, netvsc, vmware)?
>

Nothing special at all, only discussion started based on af_packet
implementation. If we give a decision based on this RFC, same logic can
be followed with existing or new software PMDs.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-07  7:23     ` Mattias Rönnblom
@ 2024-05-07 13:49       ` Ferruh Yigit
  2024-05-07 14:51         ` Stephen Hemminger
  2024-05-08  6:25         ` Mattias Rönnblom
  2024-05-07 19:19       ` Morten Brørup
  1 sibling, 2 replies; 179+ messages in thread
From: Ferruh Yigit @ 2024-05-07 13:49 UTC (permalink / raw)
  To: Mattias Rönnblom, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger,
	Morten Brørup

On 5/7/2024 8:23 AM, Mattias Rönnblom wrote:
> On 2024-04-28 17:11, Mattias Rönnblom wrote:
>> On 2024-04-26 16:38, Ferruh Yigit wrote:
>>> For stats reset, use an offset instead of zeroing out actual stats
>>> values,
>>> get_stats() displays diff between stats and offset.
>>> This way stats only updated in datapath and offset only updated in stats
>>> reset function. This makes stats reset function more reliable.
>>>
>>> As stats only written by single thread, we can remove 'volatile'
>>> qualifier
>>> which should improve the performance in datapath.
>>>
>>
>> volatile wouldn't help you if you had multiple writers, so that can't
>> be the reason for its removal. It would be more accurate to say it
>> should be replaced with atomic updates. If you don't use volatile and
>> don't use atomics, you have to consider if the compiler can reach the
>> conclusion that it does not need to store the counter value for future
>> use *for that thread*. Since otherwise, I don't think the store
>> actually needs to occur. Since DPDK statistics tend to work, it's
>> pretty obvious that current compilers tend not to reach this conclusion.
>>
>> If this should be done 100% properly, the update operation should be a
>> non-atomic load, non-atomic add, and an atomic store. Similarly, for
>> the reset, the offset store should be atomic.
>>
>> Considered the state of the rest of the DPDK code base, I think a
>> non-atomic, non-volatile solution is also fine.
>>
>> (That said, I think we're better off just deprecating stats reset
>> altogether, and returning -ENOTSUP here meanwhile.)
>>
>>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
>>> ---
>>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>>> Cc: Stephen Hemminger <stephen@networkplumber.org>
>>> Cc: Morten Brørup <mb@smartsharesystems.com>
>>>
>>> This update triggered by mail list discussion [1].
>>>
>>> [1]
>>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/
>>>
>>> v2:
>>> * Remove wrapping check for stats
>>> ---
>>>   drivers/net/af_packet/rte_eth_af_packet.c | 66 ++++++++++++++---------
>>>   1 file changed, 41 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/drivers/net/af_packet/rte_eth_af_packet.c
>>> b/drivers/net/af_packet/rte_eth_af_packet.c
>>> index 397a32db5886..10c8e1e50139 100644
>>> --- a/drivers/net/af_packet/rte_eth_af_packet.c
>>> +++ b/drivers/net/af_packet/rte_eth_af_packet.c
>>> @@ -51,8 +51,10 @@ struct pkt_rx_queue {
>>>       uint16_t in_port;
>>>       uint8_t vlan_strip;
>>> -    volatile unsigned long rx_pkts;
>>> -    volatile unsigned long rx_bytes;
>>> +    uint64_t rx_pkts;
>>> +    uint64_t rx_bytes;
>>> +    uint64_t rx_pkts_offset;
>>> +    uint64_t rx_bytes_offset;
>>
>> I suggest you introduce a separate struct for reset-able counters.
>> It'll make things cleaner, and you can sneak in atomics without too
>> much atomics-related bloat.
>>
>> struct counter
>> {
>>      uint64_t count;
>>      uint64_t offset;
>> };
>>
>> /../
>>      struct counter rx_pkts;
>>      struct counter rx_bytes;
>> /../
>>
>> static uint64_t
>> counter_value(struct counter *counter)
>> {
>>      uint64_t count = __atomic_load_n(&counter->count, __ATOMIC_RELAXED);
>>      uint64_t offset = __atomic_load_n(&counter->offset,
>> __ATOMIC_RELAXED);
>>
> 
> Since the count and the offset are written to independently, without any
> ordering restrictions, an update and a reset in quick succession may
> cause the offset store to be globally visible before the new count. In
> such a scenario, a reader could see an offset > count.
> 
> Thus, unless I'm missing something, one should add a
> 
> if (unlikely(offset > count))
>     return 0;
> 
> here. With the appropriate comment explaining why this might be.
> 
> Another approach would be to think about what memory barriers may be
> required to make sure one sees the count update before the offset
> update, but, intuitively, that seems like both more complex and more
> costly (performance-wise).
> 

We are going with lazy alternative and requesting to stop forwarding
before stats reset, this should prevent 'count' and 'offset' being
updated simultaneously.


>>      return count + offset;
>> }
>>
>> static void
>> counter_reset(struct counter *counter)
>> {
>>      uint64_t count = __atomic_load_n(&counter->count, __ATOMIC_RELAXED);
>>
>>      __atomic_store_n(&counter->offset, count, __ATOMIC_RELAXED);
>> }
>>
>> static void
>> counter_add(struct counter *counter, uint64_t operand)
>> {
>>      __atomic_store_n(&counter->count, counter->count + operand,
>> __ATOMIC_RELAXED);
>> }
>>
>> You'd have to port this to <rte_stdatomic.h> calls, which prevents
>> non-atomic loads from RTE_ATOMIC()s. The non-atomic reads above must
>> be replaced with explicit relaxed non-atomic load. Otherwise, if you
>> just use "counter->count", that would be an atomic load with
>> sequential consistency memory order on C11 atomics-based builds, which
>> would result in a barrier, at least on weakly ordered machines (e.g.,
>> ARM).
>>
>> I would still use a struct and some helper-functions even for the less
>> ambitious, non-atomic variant.
>>
>> The only drawback of using GCC built-ins type atomics here, versus an
>> atomic- and volatile-free approach, is that current compilers seems to
>> refuse merging atomic stores. It's beyond me why this is the case. If
>> you store to a variable twice in quick succession, it'll be two store
>> machine instructions, even in cases where the compiler *knows* the
>> value is identical. So volatile, even though you didn't ask for it.
>> Weird.
>>
>> So if you have a loop, you may want to make an "counter_add()" in the
>> end from a temporary, to get the final 0.001% of performance.
>>
>> If the tech board thinks MT-safe reset-able software-manage statistics
>> is the future (as opposed to dropping reset support, for example), I
>> think this stuff should go into a separate header file, so other PMDs
>> can reuse it. Maybe out of scope for this patch.
>>
>>>   };
>>>   struct pkt_tx_queue {
>>> @@ -64,9 +66,12 @@ struct pkt_tx_queue {
>>>       unsigned int framecount;
>>>       unsigned int framenum;
>>> -    volatile unsigned long tx_pkts;
>>> -    volatile unsigned long err_pkts;
>>> -    volatile unsigned long tx_bytes;
>>> +    uint64_t tx_pkts;
>>> +    uint64_t err_pkts;
>>> +    uint64_t tx_bytes;
>>> +    uint64_t tx_pkts_offset;
>>> +    uint64_t err_pkts_offset;
>>> +    uint64_t tx_bytes_offset;
>>>   };
>>>   struct pmd_internals {
>>> @@ -385,8 +390,15 @@ eth_dev_info(struct rte_eth_dev *dev, struct
>>> rte_eth_dev_info *dev_info)
>>>       return 0;
>>>   }
>>> +
>>> +static uint64_t
>>> +stats_get_diff(uint64_t stats, uint64_t offset)
>>> +{
>>> +    return stats - offset;
>>> +}
>>> +
>>>   static int
>>> -eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
>>> +eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
>>>   {
>>>       unsigned i, imax;
>>>       unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
>>> @@ -396,27 +408,29 @@ eth_stats_get(struct rte_eth_dev *dev, struct
>>> rte_eth_stats *igb_stats)
>>>       imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
>>>               internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
>>>       for (i = 0; i < imax; i++) {
>>> -        igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
>>> -        igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
>>> -        rx_total += igb_stats->q_ipackets[i];
>>> -        rx_bytes_total += igb_stats->q_ibytes[i];
>>> +        struct pkt_rx_queue *rxq = &internal->rx_queue[i];
>>> +        stats->q_ipackets[i] = stats_get_diff(rxq->rx_pkts,
>>> rxq->rx_pkts_offset);
>>> +        stats->q_ibytes[i] = stats_get_diff(rxq->rx_bytes,
>>> rxq->rx_bytes_offset);
>>> +        rx_total += stats->q_ipackets[i];
>>> +        rx_bytes_total += stats->q_ibytes[i];
>>>       }
>>>       imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
>>>               internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
>>>       for (i = 0; i < imax; i++) {
>>> -        igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
>>> -        igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
>>> -        tx_total += igb_stats->q_opackets[i];
>>> -        tx_err_total += internal->tx_queue[i].err_pkts;
>>> -        tx_bytes_total += igb_stats->q_obytes[i];
>>> +        struct pkt_tx_queue *txq = &internal->tx_queue[i];
>>> +        stats->q_opackets[i] = stats_get_diff(txq->tx_pkts,
>>> txq->tx_pkts_offset);
>>> +        stats->q_obytes[i] = stats_get_diff(txq->tx_bytes,
>>> txq->tx_bytes_offset);
>>> +        tx_total += stats->q_opackets[i];
>>> +        tx_err_total += stats_get_diff(txq->err_pkts,
>>> txq->err_pkts_offset);
>>> +        tx_bytes_total += stats->q_obytes[i];
>>>       }
>>> -    igb_stats->ipackets = rx_total;
>>> -    igb_stats->ibytes = rx_bytes_total;
>>> -    igb_stats->opackets = tx_total;
>>> -    igb_stats->oerrors = tx_err_total;
>>> -    igb_stats->obytes = tx_bytes_total;
>>> +    stats->ipackets = rx_total;
>>> +    stats->ibytes = rx_bytes_total;
>>> +    stats->opackets = tx_total;
>>> +    stats->oerrors = tx_err_total;
>>> +    stats->obytes = tx_bytes_total;
>>>       return 0;
>>>   }
>>> @@ -427,14 +441,16 @@ eth_stats_reset(struct rte_eth_dev *dev)
>>>       struct pmd_internals *internal = dev->data->dev_private;
>>>       for (i = 0; i < internal->nb_queues; i++) {
>>> -        internal->rx_queue[i].rx_pkts = 0;
>>> -        internal->rx_queue[i].rx_bytes = 0;
>>> +        struct pkt_rx_queue *rxq = &internal->rx_queue[i];
>>> +        rxq->rx_pkts_offset = rxq->rx_pkts;
>>> +        rxq->rx_bytes_offset = rxq->rx_bytes;
>>>       }
>>>       for (i = 0; i < internal->nb_queues; i++) {
>>> -        internal->tx_queue[i].tx_pkts = 0;
>>> -        internal->tx_queue[i].err_pkts = 0;
>>> -        internal->tx_queue[i].tx_bytes = 0;
>>> +        struct pkt_tx_queue *txq = &internal->tx_queue[i];
>>> +        txq->tx_pkts_offset = txq->tx_pkts;
>>> +        txq->err_pkts_offset = txq->err_pkts;
>>> +        txq->tx_bytes_offset = txq->tx_bytes;
>>>       }
>>>       return 0;


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-07 13:49       ` Ferruh Yigit
@ 2024-05-07 14:51         ` Stephen Hemminger
  2024-05-07 16:00           ` Morten Brørup
  2024-05-08  6:28           ` Mattias Rönnblom
  2024-05-08  6:25         ` Mattias Rönnblom
  1 sibling, 2 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-07 14:51 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: Mattias Rönnblom, John W. Linville, Thomas Monjalon, dev,
	Mattias Rönnblom, Morten Brørup

On Tue, 7 May 2024 14:49:19 +0100
Ferruh Yigit <ferruh.yigit@amd.com> wrote:

> On 5/7/2024 8:23 AM, Mattias Rönnblom wrote:
> > On 2024-04-28 17:11, Mattias Rönnblom wrote:  
> >> On 2024-04-26 16:38, Ferruh Yigit wrote:  
> >>> For stats reset, use an offset instead of zeroing out actual stats
> >>> values,
> >>> get_stats() displays diff between stats and offset.
> >>> This way stats only updated in datapath and offset only updated in stats
> >>> reset function. This makes stats reset function more reliable.
> >>>
> >>> As stats only written by single thread, we can remove 'volatile'
> >>> qualifier
> >>> which should improve the performance in datapath.
> >>>  
> >>
> >> volatile wouldn't help you if you had multiple writers, so that can't
> >> be the reason for its removal. It would be more accurate to say it
> >> should be replaced with atomic updates. If you don't use volatile and
> >> don't use atomics, you have to consider if the compiler can reach the
> >> conclusion that it does not need to store the counter value for future
> >> use *for that thread*. Since otherwise, I don't think the store
> >> actually needs to occur. Since DPDK statistics tend to work, it's
> >> pretty obvious that current compilers tend not to reach this conclusion.
> >>
> >> If this should be done 100% properly, the update operation should be a
> >> non-atomic load, non-atomic add, and an atomic store. Similarly, for
> >> the reset, the offset store should be atomic.
> >>
> >> Considered the state of the rest of the DPDK code base, I think a
> >> non-atomic, non-volatile solution is also fine.
> >>
> >> (That said, I think we're better off just deprecating stats reset
> >> altogether, and returning -ENOTSUP here meanwhile.)
> >>  
> >>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
> >>> ---
> >>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> >>> Cc: Stephen Hemminger <stephen@networkplumber.org>
> >>> Cc: Morten Brørup <mb@smartsharesystems.com>
> >>>
> >>> This update triggered by mail list discussion [1].
> >>>
> >>> [1]
> >>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/

I would prefer that the SW statistics be handled generically by ethdev
layers and used by all such drivers.

The most complete version of SW stats now is in the virtio driver.
If reset needs to be reliable (debatable), then it needs to be done without
atomics.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-07 13:48     ` Ferruh Yigit
@ 2024-05-07 14:52       ` Stephen Hemminger
  2024-05-07 17:27         ` Ferruh Yigit
  0 siblings, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-07 14:52 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: John W. Linville, Thomas Monjalon, dev, Mattias Rönnblom,
	Morten Brørup, Honnappa Nagarahalli

On Tue, 7 May 2024 14:48:51 +0100
Ferruh Yigit <ferruh.yigit@amd.com> wrote:

> On 5/3/2024 11:00 PM, Stephen Hemminger wrote:
> > On Fri, 3 May 2024 16:45:47 +0100
> > Ferruh Yigit <ferruh.yigit@amd.com> wrote:
> >   
> >> For stats reset, use an offset instead of zeroing out actual stats values,
> >> get_stats() displays diff between stats and offset.
> >> This way stats only updated in datapath and offset only updated in stats
> >> reset function. This makes stats reset function more reliable.
> >>
> >> As stats only written by single thread, we can remove 'volatile' qualifier
> >> which should improve the performance in datapath.
> >>
> >> While updating around, 'igb_stats' parameter renamed as 'stats'.
> >>
> >> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
> >> ---
> >> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> >> Cc: Stephen Hemminger <stephen@networkplumber.org>
> >> Cc: Morten Brørup <mb@smartsharesystems.com>
> >>
> >> This update triggered by mail list discussion [1].
> >>
> >> [1]
> >> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/  
> > 
> > 
> > NAK
> > 
> > I did not hear a good argument why atomic or volatile was necessary in the first place.
> > Why?
> >   
> 
> Sure, the patch is done as RFC intentionally to discuss the approach.
> 
> Agree that volatile and atomics (fetch + add + store) is not required
> for thread synchronization, as only one CPU updates stats.
> Even this understanding is important because there are PMDs using full
> atomics for stats update, like null PMD [1], this will help up clear them.
> 
> 
> And there is a case, stats reset and stats updated in different threads
> simultaneously, for this 'volatile' is not sufficient anyway and full
> atomics is required. As this will cause performance impact we are
> already saying stats update and reset can't happen at the same time [2].
> With this update volatile and atomics are not required for this case too.
> (Also using offset to increase stats reset reliability.)
> 
> 
> In this patch volatile replaced with atomic load and atomic store (not
> atomic fetch and add), to ensure that stats stored to memory and not
> kept in device registers only.
> With volatile, it is guaranteed that updated stats stored back to
> memory, but without volatile and atomics I am not sure if this is
> guaranteed. Practically I can see this working, but theoretically not
> sure. This is similar concern with change in your patch that is casting
> to volatile to ensure value read from memory [3].
> 
> Expectation is, only atomics load and store will have smaller
> performance impact than volatile, ensuring memory load and store when
> needed.

The device register worry, can just be handled with a compiler barrier.
Does not need the stronger guarantee of atomic or volatile.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-03 15:45 ` [RFC v3] " Ferruh Yigit
  2024-05-03 22:00   ` Stephen Hemminger
@ 2024-05-07 15:27   ` Morten Brørup
  2024-05-07 17:40     ` Ferruh Yigit
  1 sibling, 1 reply; 179+ messages in thread
From: Morten Brørup @ 2024-05-07 15:27 UTC (permalink / raw)
  To: Ferruh Yigit, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger

> From: Ferruh Yigit [mailto:ferruh.yigit@amd.com]
> Sent: Friday, 3 May 2024 17.46
> 
> For stats reset, use an offset instead of zeroing out actual stats values,
> get_stats() displays diff between stats and offset.
> This way stats only updated in datapath and offset only updated in stats
> reset function. This makes stats reset function more reliable.
> 
> As stats only written by single thread, we can remove 'volatile' qualifier
> which should improve the performance in datapath.
> 
> While updating around, 'igb_stats' parameter renamed as 'stats'.
> 
> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
> ---
> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> Cc: Stephen Hemminger <stephen@networkplumber.org>
> Cc: Morten Brørup <mb@smartsharesystems.com>
> 
> This update triggered by mail list discussion [1].
> 
> [1]
> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-
> 5f4dd3969f99@lysator.liu.se/
> 
> v2:
> * Remove wrapping check for stats
> 
> v3:
> * counter and offset put into same struct per stats
> * Use atomic load / store for stats values
> ---

Note: My comments below relate to software PMDs only.

Design for the following invariants:
1. "counter" may increase at any time. (So stopping forwarding is not required.)
2. "counter" may not decrease.
3. "offset" is always <= "counter".

So:

Stats_get() must read "offset" before "counter"; if "counter" races to increase in the mean time, it doesn't hurt. Stats_get() is a relatively "cold" function, so barriers etc. are acceptable.

Assuming that stats_add() lazy-writes "counter"; if stats_get() reads an old value, its result will be slightly off, but not negative.

Similarly for stats_reset(), which obviously reads "counter" before writing "offset"; if "counter" races to increase in the mean time, the too low "offset" will not cause negative stats from stats_get().

And a requested change for performance:

> +struct stats {
> +	uint64_t counter;
> +	uint64_t offset;
> +};

The "offset" is cold.
Stats_add(), which is the only hot function, only touches "counter".

Instead of having a struct with {counter, offset}, I strongly prefer having them separate.
E.g. as a struct defining the set of statistics (e.g. pkts, bytes, errors), instantiated once for the counters (in a hot zone of the device data structure) and once for the offsets (in a cold zone of the device data structure).
There could be variants of this "set of statistics" struct, e.g. one for RX and a different one for TX. (Each variant would be instantiated twice, once for counters, and once for offsets.)

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-07 14:51         ` Stephen Hemminger
@ 2024-05-07 16:00           ` Morten Brørup
  2024-05-07 16:54             ` Ferruh Yigit
  2024-05-08  7:48             ` Mattias Rönnblom
  2024-05-08  6:28           ` Mattias Rönnblom
  1 sibling, 2 replies; 179+ messages in thread
From: Morten Brørup @ 2024-05-07 16:00 UTC (permalink / raw)
  To: Stephen Hemminger, Ferruh Yigit
  Cc: Mattias Rönnblom, John W. Linville, Thomas Monjalon, dev,
	Mattias Rönnblom

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Tuesday, 7 May 2024 16.51

> I would prefer that the SW statistics be handled generically by ethdev
> layers and used by all such drivers.

I agree.

Please note that maintaining counters in the ethdev layer might cause more cache misses than maintaining them in the hot parts of the individual drivers' data structures, so it's not all that simple. ;-)

Until then, let's find a short term solution, viable to implement across all software NIC drivers without API/ABI breakage.

> 
> The most complete version of SW stats now is in the virtio driver.

It looks like the virtio PMD maintains the counters; they are not retrieved from the host.

Considering a DPDK application running as a virtual machine (guest) on a host server...

If the host is unable to put a packet onto the guest's virtio RX queue - like when a HW NIC is out of RX descriptors - is it counted somewhere visible to the guest?

Similarly, if the guest is unable to put a packet onto its virtio TX queue, is it counted somewhere visible to the host?

> If reset needs to be reliable (debatable), then it needs to be done without
> atomics.

Let's modify that slightly: Without performance degradation in the fast path.
I'm not sure that all atomic operations are slow.
But you are right that it needs to be done without _Atomic counters; they seem to be slow.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-07 16:00           ` Morten Brørup
@ 2024-05-07 16:54             ` Ferruh Yigit
  2024-05-07 18:47               ` Stephen Hemminger
  2024-05-08  7:48             ` Mattias Rönnblom
  1 sibling, 1 reply; 179+ messages in thread
From: Ferruh Yigit @ 2024-05-07 16:54 UTC (permalink / raw)
  To: Morten Brørup, Stephen Hemminger
  Cc: Mattias Rönnblom, John W. Linville, Thomas Monjalon, dev,
	Mattias Rönnblom

On 5/7/2024 5:00 PM, Morten Brørup wrote:
>> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
>> Sent: Tuesday, 7 May 2024 16.51
> 
>> I would prefer that the SW statistics be handled generically by ethdev
>> layers and used by all such drivers.
> 
> I agree.
> 
> Please note that maintaining counters in the ethdev layer might cause more cache misses than maintaining them in the hot parts of the individual drivers' data structures, so it's not all that simple. ;-)
> 
> Until then, let's find a short term solution, viable to implement across all software NIC drivers without API/ABI breakage.
> 

I am against ehtdev layer being aware of SW drivers and behave
differently for them.
This is dev_ops and can be managed per driver. We can add helper
functions for drivers if there is a common pattern.

>>
>> The most complete version of SW stats now is in the virtio driver.
> 
> It looks like the virtio PMD maintains the counters; they are not retrieved from the host.
> 
> Considering a DPDK application running as a virtual machine (guest) on a host server...
> 
> If the host is unable to put a packet onto the guest's virtio RX queue - like when a HW NIC is out of RX descriptors - is it counted somewhere visible to the guest?
> 
> Similarly, if the guest is unable to put a packet onto its virtio TX queue, is it counted somewhere visible to the host?
> 
>> If reset needs to be reliable (debatable), then it needs to be done without
>> atomics.
> 
> Let's modify that slightly: Without performance degradation in the fast path.
> I'm not sure that all atomic operations are slow.
> But you are right that it needs to be done without _Atomic counters; they seem to be slow.
> 


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-07 14:52       ` Stephen Hemminger
@ 2024-05-07 17:27         ` Ferruh Yigit
  0 siblings, 0 replies; 179+ messages in thread
From: Ferruh Yigit @ 2024-05-07 17:27 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: John W. Linville, Thomas Monjalon, dev, Mattias Rönnblom,
	Morten Brørup, Honnappa Nagarahalli

On 5/7/2024 3:52 PM, Stephen Hemminger wrote:
> On Tue, 7 May 2024 14:48:51 +0100
> Ferruh Yigit <ferruh.yigit@amd.com> wrote:
> 
>> On 5/3/2024 11:00 PM, Stephen Hemminger wrote:
>>> On Fri, 3 May 2024 16:45:47 +0100
>>> Ferruh Yigit <ferruh.yigit@amd.com> wrote:
>>>   
>>>> For stats reset, use an offset instead of zeroing out actual stats values,
>>>> get_stats() displays diff between stats and offset.
>>>> This way stats only updated in datapath and offset only updated in stats
>>>> reset function. This makes stats reset function more reliable.
>>>>
>>>> As stats only written by single thread, we can remove 'volatile' qualifier
>>>> which should improve the performance in datapath.
>>>>
>>>> While updating around, 'igb_stats' parameter renamed as 'stats'.
>>>>
>>>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
>>>> ---
>>>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>>>> Cc: Stephen Hemminger <stephen@networkplumber.org>
>>>> Cc: Morten Brørup <mb@smartsharesystems.com>
>>>>
>>>> This update triggered by mail list discussion [1].
>>>>
>>>> [1]
>>>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/  
>>>
>>>
>>> NAK
>>>
>>> I did not hear a good argument why atomic or volatile was necessary in the first place.
>>> Why?
>>>   
>>
>> Sure, the patch is done as RFC intentionally to discuss the approach.
>>
>> Agree that volatile and atomics (fetch + add + store) is not required
>> for thread synchronization, as only one CPU updates stats.
>> Even this understanding is important because there are PMDs using full
>> atomics for stats update, like null PMD [1], this will help up clear them.
>>
>>
>> And there is a case, stats reset and stats updated in different threads
>> simultaneously, for this 'volatile' is not sufficient anyway and full
>> atomics is required. As this will cause performance impact we are
>> already saying stats update and reset can't happen at the same time [2].
>> With this update volatile and atomics are not required for this case too.
>> (Also using offset to increase stats reset reliability.)
>>
>>
>> In this patch volatile replaced with atomic load and atomic store (not
>> atomic fetch and add), to ensure that stats stored to memory and not
>> kept in device registers only.
>> With volatile, it is guaranteed that updated stats stored back to
>> memory, but without volatile and atomics I am not sure if this is
>> guaranteed. Practically I can see this working, but theoretically not
>> sure. This is similar concern with change in your patch that is casting
>> to volatile to ensure value read from memory [3].
>>
>> Expectation is, only atomics load and store will have smaller
>> performance impact than volatile, ensuring memory load and store when
>> needed.
> 
> The device register worry, can just be handled with a compiler barrier.
> Does not need the stronger guarantee of atomic or volatile.
>

Based on Morten's email, counter being stored late to memory may not be
an issue, so may not need even compiler barrier, let me check again.


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-07 15:27   ` Morten Brørup
@ 2024-05-07 17:40     ` Ferruh Yigit
  0 siblings, 0 replies; 179+ messages in thread
From: Ferruh Yigit @ 2024-05-07 17:40 UTC (permalink / raw)
  To: Morten Brørup, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger

On 5/7/2024 4:27 PM, Morten Brørup wrote:
>> From: Ferruh Yigit [mailto:ferruh.yigit@amd.com]
>> Sent: Friday, 3 May 2024 17.46
>>
>> For stats reset, use an offset instead of zeroing out actual stats values,
>> get_stats() displays diff between stats and offset.
>> This way stats only updated in datapath and offset only updated in stats
>> reset function. This makes stats reset function more reliable.
>>
>> As stats only written by single thread, we can remove 'volatile' qualifier
>> which should improve the performance in datapath.
>>
>> While updating around, 'igb_stats' parameter renamed as 'stats'.
>>
>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
>> ---
>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>> Cc: Stephen Hemminger <stephen@networkplumber.org>
>> Cc: Morten Brørup <mb@smartsharesystems.com>
>>
>> This update triggered by mail list discussion [1].
>>
>> [1]
>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-
>> 5f4dd3969f99@lysator.liu.se/
>>
>> v2:
>> * Remove wrapping check for stats
>>
>> v3:
>> * counter and offset put into same struct per stats
>> * Use atomic load / store for stats values
>> ---
> 
> Note: My comments below relate to software PMDs only.
> 
> Design for the following invariants:
> 1. "counter" may increase at any time. (So stopping forwarding is not required.)
>

Mattias mentioned a case [1] that may end up 'offset > count', for being
safe side we may start with restriction.

[1]
https://inbox.dpdk.org/dev/20240425174617.2126159-1-ferruh.yigit@amd.com/T/#m29cd179228c164181d2bb7dea716dee6e91ab169

> 2. "counter" may not decrease.
> 3. "offset" is always <= "counter".
> 
> So:
> 
> Stats_get() must read "offset" before "counter"; if "counter" races to increase in the mean time, it doesn't hurt. Stats_get() is a relatively "cold" function, so barriers etc. are acceptable.
> 
> Assuming that stats_add() lazy-writes "counter"; if stats_get() reads an old value, its result will be slightly off, but not negative.
> 
> Similarly for stats_reset(), which obviously reads "counter" before writing "offset"; if "counter" races to increase in the mean time, the too low "offset" will not cause negative stats from stats_get().
> 

ack on above items.

> 
> And a requested change for performance:
> 
>> +struct stats {
>> +	uint64_t counter;
>> +	uint64_t offset;
>> +};
> 
> The "offset" is cold.
> Stats_add(), which is the only hot function, only touches "counter".
> 
> Instead of having a struct with {counter, offset}, I strongly prefer having them separate.
> E.g. as a struct defining the set of statistics (e.g. pkts, bytes, errors), instantiated once for the counters (in a hot zone of the device data structure) and once for the offsets (in a cold zone of the device data structure).
> There could be variants of this "set of statistics" struct, e.g. one for RX and a different one for TX. (Each variant would be instantiated twice, once for counters, and once for offsets.)
> 

Although having them together was logical, good point from performance
perspective, let me work on it.


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-07 16:54             ` Ferruh Yigit
@ 2024-05-07 18:47               ` Stephen Hemminger
  0 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-07 18:47 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: Morten Brørup, Mattias Rönnblom, John W. Linville,
	Thomas Monjalon, dev, Mattias Rönnblom

On Tue, 7 May 2024 17:54:18 +0100
Ferruh Yigit <ferruh.yigit@amd.com> wrote:

> On 5/7/2024 5:00 PM, Morten Brørup wrote:
> >> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> >> Sent: Tuesday, 7 May 2024 16.51  
> >   
> >> I would prefer that the SW statistics be handled generically by ethdev
> >> layers and used by all such drivers.  
> > 
> > I agree.
> > 
> > Please note that maintaining counters in the ethdev layer might cause more cache misses than maintaining them in the hot parts of the individual drivers' data structures, so it's not all that simple. ;-)
> > 
> > Until then, let's find a short term solution, viable to implement across all software NIC drivers without API/ABI breakage.
> >   
> 
> I am against ehtdev layer being aware of SW drivers and behave
> differently for them.
> This is dev_ops and can be managed per driver. We can add helper
> functions for drivers if there is a common pattern.

It is more about having a set of helper routines for SW only drivers.
I have something in progress for this.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-07  7:23     ` Mattias Rönnblom
  2024-05-07 13:49       ` Ferruh Yigit
@ 2024-05-07 19:19       ` Morten Brørup
  2024-05-08  6:34         ` Mattias Rönnblom
  1 sibling, 1 reply; 179+ messages in thread
From: Morten Brørup @ 2024-05-07 19:19 UTC (permalink / raw)
  To: Mattias Rönnblom, Ferruh Yigit, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger

> From: Mattias Rönnblom [mailto:hofors@lysator.liu.se]
> Sent: Tuesday, 7 May 2024 09.24
> 
> On 2024-04-28 17:11, Mattias Rönnblom wrote:
> > On 2024-04-26 16:38, Ferruh Yigit wrote:

[...]

> > static uint64_t
> > counter_value(struct counter *counter)
> > {
> >      uint64_t count = __atomic_load_n(&counter->count,
> __ATOMIC_RELAXED);
> >      uint64_t offset = __atomic_load_n(&counter->offset,
> __ATOMIC_RELAXED);
> >
> 
> Since the count and the offset are written to independently, without any
> ordering restrictions, an update and a reset in quick succession may
> cause the offset store to be globally visible before the new count.

Good catch.
This may happen when a thread calls stats_add() and then the same thread calls stats_reset().

> In such a scenario, a reader could see an offset > count.
> 
> Thus, unless I'm missing something, one should add a
> 
> if (unlikely(offset > count))
> 	return 0;
> 
> here. With the appropriate comment explaining why this might be.
> 
> Another approach would be to think about what memory barriers may be
> required to make sure one sees the count update before the offset
> update, but, intuitively, that seems like both more complex and more
> costly (performance-wise).

I think it can be done without affecting stats_add(), by using "offset" with Release-Consume ordering:
 - stats_reset() must write "offset" with memory_order_release, so "counter" cannot be visible after it, and
 - stats_get() must read "offset" with memory_order_consume, so no reads or writes in the current thread dependent on "offset" can be reordered before this load, and writes to "counter" (a data-dependent variable) in other threads that release "offset" are visible in the current thread.

> 
> >      return count + offset;
> > }
> >
> > static void
> > counter_reset(struct counter *counter)
> > {
> >      uint64_t count = __atomic_load_n(&counter->count,
> __ATOMIC_RELAXED);
> >
> >      __atomic_store_n(&counter->offset, count, __ATOMIC_RELAXED);
> > }
> >
> > static void
> > counter_add(struct counter *counter, uint64_t operand)
> > {
> >      __atomic_store_n(&counter->count, counter->count + operand,
> > __ATOMIC_RELAXED);
> > }


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-07 13:49       ` Ferruh Yigit
  2024-05-07 14:51         ` Stephen Hemminger
@ 2024-05-08  6:25         ` Mattias Rönnblom
  1 sibling, 0 replies; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-08  6:25 UTC (permalink / raw)
  To: Ferruh Yigit, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger,
	Morten Brørup

On 2024-05-07 15:49, Ferruh Yigit wrote:
> On 5/7/2024 8:23 AM, Mattias Rönnblom wrote:
>> On 2024-04-28 17:11, Mattias Rönnblom wrote:
>>> On 2024-04-26 16:38, Ferruh Yigit wrote:
>>>> For stats reset, use an offset instead of zeroing out actual stats
>>>> values,
>>>> get_stats() displays diff between stats and offset.
>>>> This way stats only updated in datapath and offset only updated in stats
>>>> reset function. This makes stats reset function more reliable.
>>>>
>>>> As stats only written by single thread, we can remove 'volatile'
>>>> qualifier
>>>> which should improve the performance in datapath.
>>>>
>>>
>>> volatile wouldn't help you if you had multiple writers, so that can't
>>> be the reason for its removal. It would be more accurate to say it
>>> should be replaced with atomic updates. If you don't use volatile and
>>> don't use atomics, you have to consider if the compiler can reach the
>>> conclusion that it does not need to store the counter value for future
>>> use *for that thread*. Since otherwise, I don't think the store
>>> actually needs to occur. Since DPDK statistics tend to work, it's
>>> pretty obvious that current compilers tend not to reach this conclusion.
>>>
>>> If this should be done 100% properly, the update operation should be a
>>> non-atomic load, non-atomic add, and an atomic store. Similarly, for
>>> the reset, the offset store should be atomic.
>>>
>>> Considered the state of the rest of the DPDK code base, I think a
>>> non-atomic, non-volatile solution is also fine.
>>>
>>> (That said, I think we're better off just deprecating stats reset
>>> altogether, and returning -ENOTSUP here meanwhile.)
>>>
>>>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
>>>> ---
>>>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>>>> Cc: Stephen Hemminger <stephen@networkplumber.org>
>>>> Cc: Morten Brørup <mb@smartsharesystems.com>
>>>>
>>>> This update triggered by mail list discussion [1].
>>>>
>>>> [1]
>>>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/
>>>>
>>>> v2:
>>>> * Remove wrapping check for stats
>>>> ---
>>>>    drivers/net/af_packet/rte_eth_af_packet.c | 66 ++++++++++++++---------
>>>>    1 file changed, 41 insertions(+), 25 deletions(-)
>>>>
>>>> diff --git a/drivers/net/af_packet/rte_eth_af_packet.c
>>>> b/drivers/net/af_packet/rte_eth_af_packet.c
>>>> index 397a32db5886..10c8e1e50139 100644
>>>> --- a/drivers/net/af_packet/rte_eth_af_packet.c
>>>> +++ b/drivers/net/af_packet/rte_eth_af_packet.c
>>>> @@ -51,8 +51,10 @@ struct pkt_rx_queue {
>>>>        uint16_t in_port;
>>>>        uint8_t vlan_strip;
>>>> -    volatile unsigned long rx_pkts;
>>>> -    volatile unsigned long rx_bytes;
>>>> +    uint64_t rx_pkts;
>>>> +    uint64_t rx_bytes;
>>>> +    uint64_t rx_pkts_offset;
>>>> +    uint64_t rx_bytes_offset;
>>>
>>> I suggest you introduce a separate struct for reset-able counters.
>>> It'll make things cleaner, and you can sneak in atomics without too
>>> much atomics-related bloat.
>>>
>>> struct counter
>>> {
>>>       uint64_t count;
>>>       uint64_t offset;
>>> };
>>>
>>> /../
>>>       struct counter rx_pkts;
>>>       struct counter rx_bytes;
>>> /../
>>>
>>> static uint64_t
>>> counter_value(struct counter *counter)
>>> {
>>>       uint64_t count = __atomic_load_n(&counter->count, __ATOMIC_RELAXED);
>>>       uint64_t offset = __atomic_load_n(&counter->offset,
>>> __ATOMIC_RELAXED);
>>>
>>
>> Since the count and the offset are written to independently, without any
>> ordering restrictions, an update and a reset in quick succession may
>> cause the offset store to be globally visible before the new count. In
>> such a scenario, a reader could see an offset > count.
>>
>> Thus, unless I'm missing something, one should add a
>>
>> if (unlikely(offset > count))
>>      return 0;
>>
>> here. With the appropriate comment explaining why this might be.
>>
>> Another approach would be to think about what memory barriers may be
>> required to make sure one sees the count update before the offset
>> update, but, intuitively, that seems like both more complex and more
>> costly (performance-wise).
>>
> 
> We are going with lazy alternative and requesting to stop forwarding
> before stats reset, this should prevent 'count' and 'offset' being
> updated simultaneously.
> 
> 

In that case, 'offset' is not needed.

>>>       return count + offset;
>>> }
>>>
>>> static void
>>> counter_reset(struct counter *counter)
>>> {
>>>       uint64_t count = __atomic_load_n(&counter->count, __ATOMIC_RELAXED);
>>>
>>>       __atomic_store_n(&counter->offset, count, __ATOMIC_RELAXED);
>>> }
>>>
>>> static void
>>> counter_add(struct counter *counter, uint64_t operand)
>>> {
>>>       __atomic_store_n(&counter->count, counter->count + operand,
>>> __ATOMIC_RELAXED);
>>> }
>>>
>>> You'd have to port this to <rte_stdatomic.h> calls, which prevents
>>> non-atomic loads from RTE_ATOMIC()s. The non-atomic reads above must
>>> be replaced with explicit relaxed non-atomic load. Otherwise, if you
>>> just use "counter->count", that would be an atomic load with
>>> sequential consistency memory order on C11 atomics-based builds, which
>>> would result in a barrier, at least on weakly ordered machines (e.g.,
>>> ARM).
>>>
>>> I would still use a struct and some helper-functions even for the less
>>> ambitious, non-atomic variant.
>>>
>>> The only drawback of using GCC built-ins type atomics here, versus an
>>> atomic- and volatile-free approach, is that current compilers seems to
>>> refuse merging atomic stores. It's beyond me why this is the case. If
>>> you store to a variable twice in quick succession, it'll be two store
>>> machine instructions, even in cases where the compiler *knows* the
>>> value is identical. So volatile, even though you didn't ask for it.
>>> Weird.
>>>
>>> So if you have a loop, you may want to make an "counter_add()" in the
>>> end from a temporary, to get the final 0.001% of performance.
>>>
>>> If the tech board thinks MT-safe reset-able software-manage statistics
>>> is the future (as opposed to dropping reset support, for example), I
>>> think this stuff should go into a separate header file, so other PMDs
>>> can reuse it. Maybe out of scope for this patch.
>>>
>>>>    };
>>>>    struct pkt_tx_queue {
>>>> @@ -64,9 +66,12 @@ struct pkt_tx_queue {
>>>>        unsigned int framecount;
>>>>        unsigned int framenum;
>>>> -    volatile unsigned long tx_pkts;
>>>> -    volatile unsigned long err_pkts;
>>>> -    volatile unsigned long tx_bytes;
>>>> +    uint64_t tx_pkts;
>>>> +    uint64_t err_pkts;
>>>> +    uint64_t tx_bytes;
>>>> +    uint64_t tx_pkts_offset;
>>>> +    uint64_t err_pkts_offset;
>>>> +    uint64_t tx_bytes_offset;
>>>>    };
>>>>    struct pmd_internals {
>>>> @@ -385,8 +390,15 @@ eth_dev_info(struct rte_eth_dev *dev, struct
>>>> rte_eth_dev_info *dev_info)
>>>>        return 0;
>>>>    }
>>>> +
>>>> +static uint64_t
>>>> +stats_get_diff(uint64_t stats, uint64_t offset)
>>>> +{
>>>> +    return stats - offset;
>>>> +}
>>>> +
>>>>    static int
>>>> -eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
>>>> +eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
>>>>    {
>>>>        unsigned i, imax;
>>>>        unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
>>>> @@ -396,27 +408,29 @@ eth_stats_get(struct rte_eth_dev *dev, struct
>>>> rte_eth_stats *igb_stats)
>>>>        imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
>>>>                internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
>>>>        for (i = 0; i < imax; i++) {
>>>> -        igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
>>>> -        igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
>>>> -        rx_total += igb_stats->q_ipackets[i];
>>>> -        rx_bytes_total += igb_stats->q_ibytes[i];
>>>> +        struct pkt_rx_queue *rxq = &internal->rx_queue[i];
>>>> +        stats->q_ipackets[i] = stats_get_diff(rxq->rx_pkts,
>>>> rxq->rx_pkts_offset);
>>>> +        stats->q_ibytes[i] = stats_get_diff(rxq->rx_bytes,
>>>> rxq->rx_bytes_offset);
>>>> +        rx_total += stats->q_ipackets[i];
>>>> +        rx_bytes_total += stats->q_ibytes[i];
>>>>        }
>>>>        imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
>>>>                internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
>>>>        for (i = 0; i < imax; i++) {
>>>> -        igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
>>>> -        igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
>>>> -        tx_total += igb_stats->q_opackets[i];
>>>> -        tx_err_total += internal->tx_queue[i].err_pkts;
>>>> -        tx_bytes_total += igb_stats->q_obytes[i];
>>>> +        struct pkt_tx_queue *txq = &internal->tx_queue[i];
>>>> +        stats->q_opackets[i] = stats_get_diff(txq->tx_pkts,
>>>> txq->tx_pkts_offset);
>>>> +        stats->q_obytes[i] = stats_get_diff(txq->tx_bytes,
>>>> txq->tx_bytes_offset);
>>>> +        tx_total += stats->q_opackets[i];
>>>> +        tx_err_total += stats_get_diff(txq->err_pkts,
>>>> txq->err_pkts_offset);
>>>> +        tx_bytes_total += stats->q_obytes[i];
>>>>        }
>>>> -    igb_stats->ipackets = rx_total;
>>>> -    igb_stats->ibytes = rx_bytes_total;
>>>> -    igb_stats->opackets = tx_total;
>>>> -    igb_stats->oerrors = tx_err_total;
>>>> -    igb_stats->obytes = tx_bytes_total;
>>>> +    stats->ipackets = rx_total;
>>>> +    stats->ibytes = rx_bytes_total;
>>>> +    stats->opackets = tx_total;
>>>> +    stats->oerrors = tx_err_total;
>>>> +    stats->obytes = tx_bytes_total;
>>>>        return 0;
>>>>    }
>>>> @@ -427,14 +441,16 @@ eth_stats_reset(struct rte_eth_dev *dev)
>>>>        struct pmd_internals *internal = dev->data->dev_private;
>>>>        for (i = 0; i < internal->nb_queues; i++) {
>>>> -        internal->rx_queue[i].rx_pkts = 0;
>>>> -        internal->rx_queue[i].rx_bytes = 0;
>>>> +        struct pkt_rx_queue *rxq = &internal->rx_queue[i];
>>>> +        rxq->rx_pkts_offset = rxq->rx_pkts;
>>>> +        rxq->rx_bytes_offset = rxq->rx_bytes;
>>>>        }
>>>>        for (i = 0; i < internal->nb_queues; i++) {
>>>> -        internal->tx_queue[i].tx_pkts = 0;
>>>> -        internal->tx_queue[i].err_pkts = 0;
>>>> -        internal->tx_queue[i].tx_bytes = 0;
>>>> +        struct pkt_tx_queue *txq = &internal->tx_queue[i];
>>>> +        txq->tx_pkts_offset = txq->tx_pkts;
>>>> +        txq->err_pkts_offset = txq->err_pkts;
>>>> +        txq->tx_bytes_offset = txq->tx_bytes;
>>>>        }
>>>>        return 0;
> 

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-07 14:51         ` Stephen Hemminger
  2024-05-07 16:00           ` Morten Brørup
@ 2024-05-08  6:28           ` Mattias Rönnblom
  1 sibling, 0 replies; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-08  6:28 UTC (permalink / raw)
  To: Stephen Hemminger, Ferruh Yigit
  Cc: John W. Linville, Thomas Monjalon, dev, Mattias Rönnblom,
	Morten Brørup

On 2024-05-07 16:51, Stephen Hemminger wrote:
> On Tue, 7 May 2024 14:49:19 +0100
> Ferruh Yigit <ferruh.yigit@amd.com> wrote:
> 
>> On 5/7/2024 8:23 AM, Mattias Rönnblom wrote:
>>> On 2024-04-28 17:11, Mattias Rönnblom wrote:
>>>> On 2024-04-26 16:38, Ferruh Yigit wrote:
>>>>> For stats reset, use an offset instead of zeroing out actual stats
>>>>> values,
>>>>> get_stats() displays diff between stats and offset.
>>>>> This way stats only updated in datapath and offset only updated in stats
>>>>> reset function. This makes stats reset function more reliable.
>>>>>
>>>>> As stats only written by single thread, we can remove 'volatile'
>>>>> qualifier
>>>>> which should improve the performance in datapath.
>>>>>   
>>>>
>>>> volatile wouldn't help you if you had multiple writers, so that can't
>>>> be the reason for its removal. It would be more accurate to say it
>>>> should be replaced with atomic updates. If you don't use volatile and
>>>> don't use atomics, you have to consider if the compiler can reach the
>>>> conclusion that it does not need to store the counter value for future
>>>> use *for that thread*. Since otherwise, I don't think the store
>>>> actually needs to occur. Since DPDK statistics tend to work, it's
>>>> pretty obvious that current compilers tend not to reach this conclusion.
>>>>
>>>> If this should be done 100% properly, the update operation should be a
>>>> non-atomic load, non-atomic add, and an atomic store. Similarly, for
>>>> the reset, the offset store should be atomic.
>>>>
>>>> Considered the state of the rest of the DPDK code base, I think a
>>>> non-atomic, non-volatile solution is also fine.
>>>>
>>>> (That said, I think we're better off just deprecating stats reset
>>>> altogether, and returning -ENOTSUP here meanwhile.)
>>>>   
>>>>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
>>>>> ---
>>>>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>>>>> Cc: Stephen Hemminger <stephen@networkplumber.org>
>>>>> Cc: Morten Brørup <mb@smartsharesystems.com>
>>>>>
>>>>> This update triggered by mail list discussion [1].
>>>>>
>>>>> [1]
>>>>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/
> 
> I would prefer that the SW statistics be handled generically by ethdev
> layers and used by all such drivers.
> 
> The most complete version of SW stats now is in the virtio driver.
> If reset needs to be reliable (debatable), then it needs to be done without
> atomics.

Why it needs to be done without atomics? Whatever that means.

In what sense should they be unreliable, needs to be documented.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-07 19:19       ` Morten Brørup
@ 2024-05-08  6:34         ` Mattias Rönnblom
  2024-05-08  7:10           ` Morten Brørup
  0 siblings, 1 reply; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-08  6:34 UTC (permalink / raw)
  To: Morten Brørup, Ferruh Yigit, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger

On 2024-05-07 21:19, Morten Brørup wrote:
>> From: Mattias Rönnblom [mailto:hofors@lysator.liu.se]
>> Sent: Tuesday, 7 May 2024 09.24
>>
>> On 2024-04-28 17:11, Mattias Rönnblom wrote:
>>> On 2024-04-26 16:38, Ferruh Yigit wrote:
> 
> [...]
> 
>>> static uint64_t
>>> counter_value(struct counter *counter)
>>> {
>>>       uint64_t count = __atomic_load_n(&counter->count,
>> __ATOMIC_RELAXED);
>>>       uint64_t offset = __atomic_load_n(&counter->offset,
>> __ATOMIC_RELAXED);
>>>
>>
>> Since the count and the offset are written to independently, without any
>> ordering restrictions, an update and a reset in quick succession may
>> cause the offset store to be globally visible before the new count.
> 
> Good catch.
> This may happen when a thread calls stats_add() and then the same thread calls stats_reset().
> 
>> In such a scenario, a reader could see an offset > count.
>>
>> Thus, unless I'm missing something, one should add a
>>
>> if (unlikely(offset > count))
>> 	return 0;
>>
>> here. With the appropriate comment explaining why this might be.
>>
>> Another approach would be to think about what memory barriers may be
>> required to make sure one sees the count update before the offset
>> update, but, intuitively, that seems like both more complex and more
>> costly (performance-wise).
> 
> I think it can be done without affecting stats_add(), by using "offset" with Release-Consume ordering:
>   - stats_reset() must write "offset" with memory_order_release, so "counter" cannot be visible after it, and
>   - stats_get() must read "offset" with memory_order_consume, so no reads or writes in the current thread dependent on "offset" can be reordered before this load, and writes to "counter" (a data-dependent variable) in other threads that release "offset" are visible in the current thread.
> 

That was the kind of complexity I was thinking about. Those barriers 
come with a non-zero cost, both with different instructions being used 
and compiler optimizations being prevented.

>>
>>>       return count + offset;
>>> }
>>>
>>> static void
>>> counter_reset(struct counter *counter)
>>> {
>>>       uint64_t count = __atomic_load_n(&counter->count,
>> __ATOMIC_RELAXED);
>>>
>>>       __atomic_store_n(&counter->offset, count, __ATOMIC_RELAXED);
>>> }
>>>
>>> static void
>>> counter_add(struct counter *counter, uint64_t operand)
>>> {
>>>       __atomic_store_n(&counter->count, counter->count + operand,
>>> __ATOMIC_RELAXED);
>>> }
> 

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-08  6:34         ` Mattias Rönnblom
@ 2024-05-08  7:10           ` Morten Brørup
  2024-05-08  7:23             ` Mattias Rönnblom
  0 siblings, 1 reply; 179+ messages in thread
From: Morten Brørup @ 2024-05-08  7:10 UTC (permalink / raw)
  To: Mattias Rönnblom, Ferruh Yigit, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger

> From: Mattias Rönnblom [mailto:hofors@lysator.liu.se]
> Sent: Wednesday, 8 May 2024 08.35
> 
> On 2024-05-07 21:19, Morten Brørup wrote:
> >> From: Mattias Rönnblom [mailto:hofors@lysator.liu.se]
> >> Sent: Tuesday, 7 May 2024 09.24
> >>
> >> On 2024-04-28 17:11, Mattias Rönnblom wrote:
> >>> On 2024-04-26 16:38, Ferruh Yigit wrote:
> >
> > [...]
> >
> >>> static uint64_t
> >>> counter_value(struct counter *counter)
> >>> {
> >>>       uint64_t count = __atomic_load_n(&counter->count,
> >> __ATOMIC_RELAXED);
> >>>       uint64_t offset = __atomic_load_n(&counter->offset,
> >> __ATOMIC_RELAXED);
> >>>
> >>
> >> Since the count and the offset are written to independently, without any
> >> ordering restrictions, an update and a reset in quick succession may
> >> cause the offset store to be globally visible before the new count.
> >
> > Good catch.
> > This may happen when a thread calls stats_add() and then the same thread
> calls stats_reset().
> >
> >> In such a scenario, a reader could see an offset > count.
> >>
> >> Thus, unless I'm missing something, one should add a
> >>
> >> if (unlikely(offset > count))
> >> 	return 0;
> >>
> >> here. With the appropriate comment explaining why this might be.
> >>
> >> Another approach would be to think about what memory barriers may be
> >> required to make sure one sees the count update before the offset
> >> update, but, intuitively, that seems like both more complex and more
> >> costly (performance-wise).
> >
> > I think it can be done without affecting stats_add(), by using "offset" with
> Release-Consume ordering:
> >   - stats_reset() must write "offset" with memory_order_release, so
> "counter" cannot be visible after it, and
> >   - stats_get() must read "offset" with memory_order_consume, so no reads or
> writes in the current thread dependent on "offset" can be reordered before
> this load, and writes to "counter" (a data-dependent variable) in other
> threads that release "offset" are visible in the current thread.
> >
> 
> That was the kind of complexity I was thinking about. Those barriers
> come with a non-zero cost, both with different instructions being used
> and compiler optimizations being prevented.

Yep, you mentioned that there might be a more complex alternative, so I decided to explore it. :-)

This approach doesn't impose any new requirements on stats_add(), so the data plane performance is not affected.

For per-thread counters, stats_add() can store "counter" using memory_order_relaxed. Or, if the architecture prevents tearing of 64-bit variables, using volatile.

Counters shared by multiple threads must be atomically incremented using rte_atomic_fetch_add_explicit() with memory_order_relaxed.

> 
> >>
> >>>       return count + offset;
> >>> }
> >>>
> >>> static void
> >>> counter_reset(struct counter *counter)
> >>> {
> >>>       uint64_t count = __atomic_load_n(&counter->count,
> >> __ATOMIC_RELAXED);
> >>>
> >>>       __atomic_store_n(&counter->offset, count, __ATOMIC_RELAXED);
> >>> }
> >>>
> >>> static void
> >>> counter_add(struct counter *counter, uint64_t operand)
> >>> {
> >>>       __atomic_store_n(&counter->count, counter->count + operand,
> >>> __ATOMIC_RELAXED);
> >>> }
> >

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-03 22:00   ` Stephen Hemminger
  2024-05-07 13:48     ` Ferruh Yigit
@ 2024-05-08  7:19     ` Mattias Rönnblom
  2024-05-08 15:23       ` Stephen Hemminger
  1 sibling, 1 reply; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-08  7:19 UTC (permalink / raw)
  To: Stephen Hemminger, Ferruh Yigit
  Cc: John W. Linville, Thomas Monjalon, dev, Mattias Rönnblom,
	Morten Brørup

On 2024-05-04 00:00, Stephen Hemminger wrote:
> On Fri, 3 May 2024 16:45:47 +0100
> Ferruh Yigit <ferruh.yigit@amd.com> wrote:
> 
>> For stats reset, use an offset instead of zeroing out actual stats values,
>> get_stats() displays diff between stats and offset.
>> This way stats only updated in datapath and offset only updated in stats
>> reset function. This makes stats reset function more reliable.
>>
>> As stats only written by single thread, we can remove 'volatile' qualifier
>> which should improve the performance in datapath.
>>
>> While updating around, 'igb_stats' parameter renamed as 'stats'.
>>
>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
>> ---
>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>> Cc: Stephen Hemminger <stephen@networkplumber.org>
>> Cc: Morten Brørup <mb@smartsharesystems.com>
>>
>> This update triggered by mail list discussion [1].
>>
>> [1]
>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/
> 
> 
> NAK
> 
> I did not hear a good argument why atomic or volatile was necessary in the first place.
> Why?
> 

On the reader side, loads should be atomic.
On the writer side, stores should be atomic.

Updates (stores) should actually occur in a timely manner. The complete 
read-modify-write cycle need not be atomic, since we only have a single 
writer. All this for the per-lcore counter case.

If load or store tearing occurs, the counter values may occasionally 
take totally bogus values. I think that should be avoided. Especially 
since it will likely come at a very reasonable cost.

 From what it seems to me, load or store tearing may well occur. GCC may 
generate two 32-bit stores for a program-level 64-bit store on 32-bit 
x86. If you have constant and immediate-data store instructions, 
constant writes may also be end up teared. The kernel documentation has 
some example of this. Add LTO, it's not necessarily going to be all that 
clear what is storing-a-constant and what is not.

Maybe you care a little less if statistics are occasionally broken, or 
some transient, inconsistent state, but generally they should work, and 
they should never have some totally bogus values. So, statistics aren't 
snow flakes, mostly just business as usual.

We can't both have a culture that promotes C11-style parallel 
programming, or, at the extreme, push the C11 APIs as-is, and the say 
"and btw you don't have to care about the standard when it comes to 
statistics".

We could adopt the Linux kernel's rules, programming model, and APIs 
(ignoring legal issues). That would be very old school, maybe somewhat 
over-engineered for our purpose, include a fair amount of inline 
assembler, and also and may well depend on GCC or GCC-like compilers, 
just like what I believe the kernel does.

We could use something in-between, heavily inspired by C11 but still 
with an opportunity to work around compiler issues, library issues, and
extend the API for our use case.

I agree we shouldn't have to mark statistics _Atomic, or RTE_ATOMIC(), 
rte_atomic64_t, or rte_sometimes_atomic_and_sometimes_not64_t. Just 
keeping the usual C integer types seems like a better option to me.

> Why is this driver special (a snowflake) compared to all the other drivers doing software
> statistics (tap, virtio, xdp, ring, memif, netvsc, vmware)?

If a broken piece of code has been copied around, one place is going to 
be the first to be fixed.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-08  7:10           ` Morten Brørup
@ 2024-05-08  7:23             ` Mattias Rönnblom
  0 siblings, 0 replies; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-08  7:23 UTC (permalink / raw)
  To: Morten Brørup, Ferruh Yigit, John W. Linville
  Cc: Thomas Monjalon, dev, Mattias Rönnblom, Stephen Hemminger

On 2024-05-08 09:10, Morten Brørup wrote:
>> From: Mattias Rönnblom [mailto:hofors@lysator.liu.se]
>> Sent: Wednesday, 8 May 2024 08.35
>>
>> On 2024-05-07 21:19, Morten Brørup wrote:
>>>> From: Mattias Rönnblom [mailto:hofors@lysator.liu.se]
>>>> Sent: Tuesday, 7 May 2024 09.24
>>>>
>>>> On 2024-04-28 17:11, Mattias Rönnblom wrote:
>>>>> On 2024-04-26 16:38, Ferruh Yigit wrote:
>>>
>>> [...]
>>>
>>>>> static uint64_t
>>>>> counter_value(struct counter *counter)
>>>>> {
>>>>>        uint64_t count = __atomic_load_n(&counter->count,
>>>> __ATOMIC_RELAXED);
>>>>>        uint64_t offset = __atomic_load_n(&counter->offset,
>>>> __ATOMIC_RELAXED);
>>>>>
>>>>
>>>> Since the count and the offset are written to independently, without any
>>>> ordering restrictions, an update and a reset in quick succession may
>>>> cause the offset store to be globally visible before the new count.
>>>
>>> Good catch.
>>> This may happen when a thread calls stats_add() and then the same thread
>> calls stats_reset().
>>>
>>>> In such a scenario, a reader could see an offset > count.
>>>>
>>>> Thus, unless I'm missing something, one should add a
>>>>
>>>> if (unlikely(offset > count))
>>>> 	return 0;
>>>>
>>>> here. With the appropriate comment explaining why this might be.
>>>>
>>>> Another approach would be to think about what memory barriers may be
>>>> required to make sure one sees the count update before the offset
>>>> update, but, intuitively, that seems like both more complex and more
>>>> costly (performance-wise).
>>>
>>> I think it can be done without affecting stats_add(), by using "offset" with
>> Release-Consume ordering:
>>>    - stats_reset() must write "offset" with memory_order_release, so
>> "counter" cannot be visible after it, and
>>>    - stats_get() must read "offset" with memory_order_consume, so no reads or
>> writes in the current thread dependent on "offset" can be reordered before
>> this load, and writes to "counter" (a data-dependent variable) in other
>> threads that release "offset" are visible in the current thread.
>>>
>>
>> That was the kind of complexity I was thinking about. Those barriers
>> come with a non-zero cost, both with different instructions being used
>> and compiler optimizations being prevented.
> 
> Yep, you mentioned that there might be a more complex alternative, so I decided to explore it. :-)
> 
> This approach doesn't impose any new requirements on stats_add(), so the data plane performance is not affected.
> 

OK, I get it now. That's a good point. In my thought experiment, I had a 
thread both updating and resetting the counter, which should be allowed, 
but you could have barriers sit only in the reset routine.

> For per-thread counters, stats_add() can store "counter" using memory_order_relaxed. Or, if the architecture prevents tearing of 64-bit variables, using volatile.
> 
> Counters shared by multiple threads must be atomically incremented using rte_atomic_fetch_add_explicit() with memory_order_relaxed.
> 
>>
>>>>
>>>>>        return count + offset;
>>>>> }
>>>>>
>>>>> static void
>>>>> counter_reset(struct counter *counter)
>>>>> {
>>>>>        uint64_t count = __atomic_load_n(&counter->count,
>>>> __ATOMIC_RELAXED);
>>>>>
>>>>>        __atomic_store_n(&counter->offset, count, __ATOMIC_RELAXED);
>>>>> }
>>>>>
>>>>> static void
>>>>> counter_add(struct counter *counter, uint64_t operand)
>>>>> {
>>>>>        __atomic_store_n(&counter->count, counter->count + operand,
>>>>> __ATOMIC_RELAXED);
>>>>> }
>>>

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v2] net/af_packet: make stats reset reliable
  2024-05-07 16:00           ` Morten Brørup
  2024-05-07 16:54             ` Ferruh Yigit
@ 2024-05-08  7:48             ` Mattias Rönnblom
  1 sibling, 0 replies; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-08  7:48 UTC (permalink / raw)
  To: Morten Brørup, Stephen Hemminger, Ferruh Yigit
  Cc: John W. Linville, Thomas Monjalon, dev, Mattias Rönnblom

On 2024-05-07 18:00, Morten Brørup wrote:
>> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
>> Sent: Tuesday, 7 May 2024 16.51
> 
>> I would prefer that the SW statistics be handled generically by ethdev
>> layers and used by all such drivers.
> 
> I agree.
> 
> Please note that maintaining counters in the ethdev layer might cause more cache misses than maintaining them in the hot parts of the individual drivers' data structures, so it's not all that simple. ;-)
> 
> Until then, let's find a short term solution, viable to implement across all software NIC drivers without API/ABI breakage.
> 
>>
>> The most complete version of SW stats now is in the virtio driver.
> 
> It looks like the virtio PMD maintains the counters; they are not retrieved from the host.
> 
> Considering a DPDK application running as a virtual machine (guest) on a host server...
> 
> If the host is unable to put a packet onto the guest's virtio RX queue - like when a HW NIC is out of RX descriptors - is it counted somewhere visible to the guest?
> 
> Similarly, if the guest is unable to put a packet onto its virtio TX queue, is it counted somewhere visible to the host?
> 
>> If reset needs to be reliable (debatable), then it needs to be done without
>> atomics.
> 
> Let's modify that slightly: Without performance degradation in the fast path.
> I'm not sure that all atomic operations are slow.

Relaxed atomic loads from and stores to naturally aligned addresses are 
for free on ARM and x86_64 up to at least 64 bits.

"For free" is not entirely true, since both C11 relaxed stores and 
stores through volatile may prevent vectorization in GCC. I don't see 
why, but in practice that seems to be the case. That is very much a 
corner case.

Also, as mentioned before, C11 atomic store effectively has volatile 
semantics, which in turn may prevent some compiler optimizations.

On 32-bit x86, 64-bit atomic stores use xmm registers, but those are 
going to be used anyway, since you'll have a 64-bit add.

> But you are right that it needs to be done without _Atomic counters; they seem to be slow.
> 

_Atomic is not slower than atomics without _Atomic, when you actually 
need atomic operations.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-08  7:19     ` Mattias Rönnblom
@ 2024-05-08 15:23       ` Stephen Hemminger
  2024-05-08 19:48         ` Ferruh Yigit
  2024-05-26  7:21         ` Mattias Rönnblom
  0 siblings, 2 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-08 15:23 UTC (permalink / raw)
  To: Mattias Rönnblom
  Cc: Ferruh Yigit, John W. Linville, Thomas Monjalon, dev,
	Mattias Rönnblom, Morten Brørup

On Wed, 8 May 2024 09:19:02 +0200
Mattias Rönnblom <hofors@lysator.liu.se> wrote:

> On 2024-05-04 00:00, Stephen Hemminger wrote:
> > On Fri, 3 May 2024 16:45:47 +0100
> > Ferruh Yigit <ferruh.yigit@amd.com> wrote:
> >   
> >> For stats reset, use an offset instead of zeroing out actual stats values,
> >> get_stats() displays diff between stats and offset.
> >> This way stats only updated in datapath and offset only updated in stats
> >> reset function. This makes stats reset function more reliable.
> >>
> >> As stats only written by single thread, we can remove 'volatile' qualifier
> >> which should improve the performance in datapath.
> >>
> >> While updating around, 'igb_stats' parameter renamed as 'stats'.
> >>
> >> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
> >> ---
> >> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> >> Cc: Stephen Hemminger <stephen@networkplumber.org>
> >> Cc: Morten Brørup <mb@smartsharesystems.com>
> >>
> >> This update triggered by mail list discussion [1].
> >>
> >> [1]
> >> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/  
> > 
> > 
> > NAK
> > 
> > I did not hear a good argument why atomic or volatile was necessary in the first place.
> > Why?
> >   
> 
> On the reader side, loads should be atomic.
> On the writer side, stores should be atomic.
> 
> Updates (stores) should actually occur in a timely manner. The complete 
> read-modify-write cycle need not be atomic, since we only have a single 
> writer. All this for the per-lcore counter case.
> 
> If load or store tearing occurs, the counter values may occasionally 
> take totally bogus values. I think that should be avoided. Especially 
> since it will likely come at a very reasonable cost.
> 
>  From what it seems to me, load or store tearing may well occur. GCC may 
> generate two 32-bit stores for a program-level 64-bit store on 32-bit 
> x86. If you have constant and immediate-data store instructions, 
> constant writes may also be end up teared. The kernel documentation has 
> some example of this. Add LTO, it's not necessarily going to be all that 
> clear what is storing-a-constant and what is not.
> 
> Maybe you care a little less if statistics are occasionally broken, or 
> some transient, inconsistent state, but generally they should work, and 
> they should never have some totally bogus values. So, statistics aren't 
> snow flakes, mostly just business as usual.
> 
> We can't both have a culture that promotes C11-style parallel 
> programming, or, at the extreme, push the C11 APIs as-is, and the say 
> "and btw you don't have to care about the standard when it comes to 
> statistics".
> 
> We could adopt the Linux kernel's rules, programming model, and APIs 
> (ignoring legal issues). That would be very old school, maybe somewhat 
> over-engineered for our purpose, include a fair amount of inline 
> assembler, and also and may well depend on GCC or GCC-like compilers, 
> just like what I believe the kernel does.
> 
> We could use something in-between, heavily inspired by C11 but still 
> with an opportunity to work around compiler issues, library issues, and
> extend the API for our use case.
> 
> I agree we shouldn't have to mark statistics _Atomic, or RTE_ATOMIC(), 
> rte_atomic64_t, or rte_sometimes_atomic_and_sometimes_not64_t. Just 
> keeping the usual C integer types seems like a better option to me.
> 
> > Why is this driver special (a snowflake) compared to all the other drivers doing software
> > statistics (tap, virtio, xdp, ring, memif, netvsc, vmware)?  
> 
> If a broken piece of code has been copied around, one place is going to 
> be the first to be fixed.


I dislike when any driver does something completely different than valid precedent.
No other driver in DPDK, Vpp, FreeBSD, Linux (and probably Windows) uses atomic for
updating statistics. We even got performance benefit at MS from removing atomic
increment of staistics in internal layers.

The idea of load tearing is crazy talk of integral types. It would break so many things.
It is the kind of stupid compiler thing that would send Linus on a rant and get
the GCC compiler writers in trouble. 

The DPDK has always favored performance over strict safety guard rails everywhere.
Switching to making every statistic an atomic operation is not in the spirit of
what is required. There is no strict guarantee necessary here.


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-08 15:23       ` Stephen Hemminger
@ 2024-05-08 19:48         ` Ferruh Yigit
  2024-05-08 20:54           ` Stephen Hemminger
  2024-05-26  7:21         ` Mattias Rönnblom
  1 sibling, 1 reply; 179+ messages in thread
From: Ferruh Yigit @ 2024-05-08 19:48 UTC (permalink / raw)
  To: Stephen Hemminger, Mattias Rönnblom
  Cc: John W. Linville, Thomas Monjalon, dev, Mattias Rönnblom,
	Morten Brørup

On 5/8/2024 4:23 PM, Stephen Hemminger wrote:
> On Wed, 8 May 2024 09:19:02 +0200
> Mattias Rönnblom <hofors@lysator.liu.se> wrote:
> 
>> On 2024-05-04 00:00, Stephen Hemminger wrote:
>>> On Fri, 3 May 2024 16:45:47 +0100
>>> Ferruh Yigit <ferruh.yigit@amd.com> wrote:
>>>   
>>>> For stats reset, use an offset instead of zeroing out actual stats values,
>>>> get_stats() displays diff between stats and offset.
>>>> This way stats only updated in datapath and offset only updated in stats
>>>> reset function. This makes stats reset function more reliable.
>>>>
>>>> As stats only written by single thread, we can remove 'volatile' qualifier
>>>> which should improve the performance in datapath.
>>>>
>>>> While updating around, 'igb_stats' parameter renamed as 'stats'.
>>>>
>>>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
>>>> ---
>>>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>>>> Cc: Stephen Hemminger <stephen@networkplumber.org>
>>>> Cc: Morten Brørup <mb@smartsharesystems.com>
>>>>
>>>> This update triggered by mail list discussion [1].
>>>>
>>>> [1]
>>>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/  
>>>
>>>
>>> NAK
>>>
>>> I did not hear a good argument why atomic or volatile was necessary in the first place.
>>> Why?
>>>   
>>
>> On the reader side, loads should be atomic.
>> On the writer side, stores should be atomic.
>>
>> Updates (stores) should actually occur in a timely manner. The complete 
>> read-modify-write cycle need not be atomic, since we only have a single 
>> writer. All this for the per-lcore counter case.
>>
>> If load or store tearing occurs, the counter values may occasionally 
>> take totally bogus values. I think that should be avoided. Especially 
>> since it will likely come at a very reasonable cost.
>>
>>  From what it seems to me, load or store tearing may well occur. GCC may 
>> generate two 32-bit stores for a program-level 64-bit store on 32-bit 
>> x86. If you have constant and immediate-data store instructions, 
>> constant writes may also be end up teared. The kernel documentation has 
>> some example of this. Add LTO, it's not necessarily going to be all that 
>> clear what is storing-a-constant and what is not.
>>
>> Maybe you care a little less if statistics are occasionally broken, or 
>> some transient, inconsistent state, but generally they should work, and 
>> they should never have some totally bogus values. So, statistics aren't 
>> snow flakes, mostly just business as usual.
>>
>> We can't both have a culture that promotes C11-style parallel 
>> programming, or, at the extreme, push the C11 APIs as-is, and the say 
>> "and btw you don't have to care about the standard when it comes to 
>> statistics".
>>
>> We could adopt the Linux kernel's rules, programming model, and APIs 
>> (ignoring legal issues). That would be very old school, maybe somewhat 
>> over-engineered for our purpose, include a fair amount of inline 
>> assembler, and also and may well depend on GCC or GCC-like compilers, 
>> just like what I believe the kernel does.
>>
>> We could use something in-between, heavily inspired by C11 but still 
>> with an opportunity to work around compiler issues, library issues, and
>> extend the API for our use case.
>>
>> I agree we shouldn't have to mark statistics _Atomic, or RTE_ATOMIC(), 
>> rte_atomic64_t, or rte_sometimes_atomic_and_sometimes_not64_t. Just 
>> keeping the usual C integer types seems like a better option to me.
>>
>>> Why is this driver special (a snowflake) compared to all the other drivers doing software
>>> statistics (tap, virtio, xdp, ring, memif, netvsc, vmware)?  
>>
>> If a broken piece of code has been copied around, one place is going to 
>> be the first to be fixed.
> 
> 
> I dislike when any driver does something completely different than valid precedent.
> No other driver in DPDK, Vpp, FreeBSD, Linux (and probably Windows) uses atomic for
> updating statistics. We even got performance benefit at MS from removing atomic
> increment of staistics in internal layers.
> 
> The idea of load tearing is crazy talk of integral types. It would break so many things.
> It is the kind of stupid compiler thing that would send Linus on a rant and get
> the GCC compiler writers in trouble. 
> 
> The DPDK has always favored performance over strict safety guard rails everywhere.
> Switching to making every statistic an atomic operation is not in the spirit of
> what is required. There is no strict guarantee necessary here.
> 

I kind of agree with Stephen.

Thanks Mattias, Morten & Stephen, it was informative discussion. But for
*SW drivers* stats update and reset is not core functionality and I
think we can be OK to get hit on corner cases, instead of
over-engineering or making code more complex.

I am for putting priority as following (from high to low):
- Datapath performance
- Stats get accuracy
- Stats reset accuracy

With the restriction that stat reset requires forwarding to stop, we can
even drop 'offset' logic.
And I am not sure if it is a real requirement that stats reset should be
supported during forwarding, although I can see it is convenient.
If we get this requirement in the future, we can focus on a solution.


As action,
I am planning to send a new version of this RFC that only removes the
'volatile' qualifier.
In next step we can remove atomic updates and volatile stat counters
from more SW drivers.

Thanks,
ferruh


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-08 19:48         ` Ferruh Yigit
@ 2024-05-08 20:54           ` Stephen Hemminger
  2024-05-09  7:43             ` Morten Brørup
  0 siblings, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-08 20:54 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: Mattias Rönnblom, John W. Linville, Thomas Monjalon, dev,
	Mattias Rönnblom, Morten Brørup

On Wed, 8 May 2024 20:48:06 +0100
Ferruh Yigit <ferruh.yigit@amd.com> wrote:

> > 
> > The idea of load tearing is crazy talk of integral types. It would break so many things.
> > It is the kind of stupid compiler thing that would send Linus on a rant and get
> > the GCC compiler writers in trouble. 
> > 
> > The DPDK has always favored performance over strict safety guard rails everywhere.
> > Switching to making every statistic an atomic operation is not in the spirit of
> > what is required. There is no strict guarantee necessary here.
> >   
> 
> I kind of agree with Stephen.
> 
> Thanks Mattias, Morten & Stephen, it was informative discussion. But for
> *SW drivers* stats update and reset is not core functionality and I
> think we can be OK to get hit on corner cases, instead of
> over-engineering or making code more complex.


I forgot the case of 64 bit values on 32 bit platforms!
Mostly because haven't cared about 32 bit for years...

The Linux kernel uses some wrappers to handle this.
On 64 bit platforms they become noop.
On 32 bit platform, they are protected by a seqlock and updates are
wrapped by the sequence count.

If we go this way, then doing similar Noop on 64 bit and atomic or seqlock
on 32 bit should be done, but in common helper.

Looking inside FreeBSD, it looks like that has changed over the years as well.

	if_inc_counter
		counter_u64_add
			atomic_add_64
But the counters are always per-cpu in this case. So although it does use
locked operation, will always be uncontended.
				

PS: Does DPDK still actually support 32 bit on x86? Can it be dropped this cycle?

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-08 20:54           ` Stephen Hemminger
@ 2024-05-09  7:43             ` Morten Brørup
  2024-05-09  9:29               ` Bruce Richardson
  0 siblings, 1 reply; 179+ messages in thread
From: Morten Brørup @ 2024-05-09  7:43 UTC (permalink / raw)
  To: Stephen Hemminger, Ferruh Yigit
  Cc: Mattias Rönnblom, John W. Linville, Thomas Monjalon, dev,
	Mattias Rönnblom

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Wednesday, 8 May 2024 22.54
> 
> On Wed, 8 May 2024 20:48:06 +0100
> Ferruh Yigit <ferruh.yigit@amd.com> wrote:
> 
> > >
> > > The idea of load tearing is crazy talk of integral types. It would
> break so many things.
> > > It is the kind of stupid compiler thing that would send Linus on a
> rant and get
> > > the GCC compiler writers in trouble.
> > >
> > > The DPDK has always favored performance over strict safety guard
> rails everywhere.
> > > Switching to making every statistic an atomic operation is not in
> the spirit of
> > > what is required. There is no strict guarantee necessary here.
> > >
> >
> > I kind of agree with Stephen.
> >
> > Thanks Mattias, Morten & Stephen, it was informative discussion. But
> for
> > *SW drivers* stats update and reset is not core functionality and I
> > think we can be OK to get hit on corner cases, instead of
> > over-engineering or making code more complex.
> 
> 
> I forgot the case of 64 bit values on 32 bit platforms!
> Mostly because haven't cared about 32 bit for years...
> 
> The Linux kernel uses some wrappers to handle this.
> On 64 bit platforms they become noop.
> On 32 bit platform, they are protected by a seqlock and updates are
> wrapped by the sequence count.
> 
> If we go this way, then doing similar Noop on 64 bit and atomic or
> seqlock
> on 32 bit should be done, but in common helper.
> 
> Looking inside FreeBSD, it looks like that has changed over the years as
> well.
> 
> 	if_inc_counter
> 		counter_u64_add
> 			atomic_add_64
> But the counters are always per-cpu in this case. So although it does
> use
> locked operation, will always be uncontended.
> 
> 
> PS: Does DPDK still actually support 32 bit on x86? Can it be dropped
> this cycle?

We cannot drop 32 bit architecture support altogether.

But, unlike the Linux kernel, DPDK doesn't need to support ancient 32 bit architectures.
If the few 32 bit architectures supported by DPDK provide non-tearing 64 bit loads/stores, we don't need locks (in the fast path) for 64 bit counters.

In addition to 32 bit x86, DPDK supports ARMv7-A (a 32 bit architecture) and 32 bit ARMv8.
I don't think DPDK support any other 32 bit architectures.

As Mattias mentioned, 32 bit x86 can use xmm registers to provide 64 bit non-tearing load/store.

Looking at ARMv7-A documentation, this architecture offers 64 bit non-tearing load/store by using two 32-bit registers and double-word Exclusive load and store instructions, LDREXD and STREXD. I don't know how costly they are, performance wise.

Supporting 64 bit counters has much broader scope than SW drivers.
Providing a "DPDK standard" design pattern with some utility functions would be useful.

The af_packet driver could serve as a reference use case.
It maintains both per-thread (per-queue) counters and the dev->data->rx_mbuf_alloc_failed counter shared by multiple threads.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-09  7:43             ` Morten Brørup
@ 2024-05-09  9:29               ` Bruce Richardson
  2024-05-09 11:37                 ` Morten Brørup
  0 siblings, 1 reply; 179+ messages in thread
From: Bruce Richardson @ 2024-05-09  9:29 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Stephen Hemminger, Ferruh Yigit, Mattias Rönnblom,
	John W. Linville, Thomas Monjalon, dev, Mattias Rönnblom

On Thu, May 09, 2024 at 09:43:16AM +0200, Morten Brørup wrote:
> > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > Sent: Wednesday, 8 May 2024 22.54
> > 
> > On Wed, 8 May 2024 20:48:06 +0100
> > Ferruh Yigit <ferruh.yigit@amd.com> wrote:
> > 
> > > >
> > > > The idea of load tearing is crazy talk of integral types. It would
> > break so many things.
> > > > It is the kind of stupid compiler thing that would send Linus on a
> > rant and get
> > > > the GCC compiler writers in trouble.
> > > >
> > > > The DPDK has always favored performance over strict safety guard
> > rails everywhere.
> > > > Switching to making every statistic an atomic operation is not in
> > the spirit of
> > > > what is required. There is no strict guarantee necessary here.
> > > >
> > >
> > > I kind of agree with Stephen.
> > >
> > > Thanks Mattias, Morten & Stephen, it was informative discussion. But
> > for
> > > *SW drivers* stats update and reset is not core functionality and I
> > > think we can be OK to get hit on corner cases, instead of
> > > over-engineering or making code more complex.
> > 
> > 
> > I forgot the case of 64 bit values on 32 bit platforms!
> > Mostly because haven't cared about 32 bit for years...
> > 
> > The Linux kernel uses some wrappers to handle this.
> > On 64 bit platforms they become noop.
> > On 32 bit platform, they are protected by a seqlock and updates are
> > wrapped by the sequence count.
> > 
> > If we go this way, then doing similar Noop on 64 bit and atomic or
> > seqlock
> > on 32 bit should be done, but in common helper.
> > 
> > Looking inside FreeBSD, it looks like that has changed over the years as
> > well.
> > 
> > 	if_inc_counter
> > 		counter_u64_add
> > 			atomic_add_64
> > But the counters are always per-cpu in this case. So although it does
> > use
> > locked operation, will always be uncontended.
> > 
> > 
> > PS: Does DPDK still actually support 32 bit on x86? Can it be dropped
> > this cycle?
> 
> We cannot drop 32 bit architecture support altogether.
> 
> But, unlike the Linux kernel, DPDK doesn't need to support ancient 32 bit architectures.
> If the few 32 bit architectures supported by DPDK provide non-tearing 64 bit loads/stores, we don't need locks (in the fast path) for 64 bit counters.
> 
> In addition to 32 bit x86, DPDK supports ARMv7-A (a 32 bit architecture) and 32 bit ARMv8.
> I don't think DPDK support any other 32 bit architectures.
> 
> 
> As Mattias mentioned, 32 bit x86 can use xmm registers to provide 64 bit non-tearing load/store.
> 

Testing this a little in godbolt, I see gcc using xmm registers on 32-bit
when updating 64-bit counters, but clang doesn't seem to do so, but instead
does 2 stores when writing back the 64 value. (I tried with both volatile
and non-volatile 64-bit values, just to see if volatile would encourage
clang to do a single store).

GCC: https://godbolt.org/z/9eqKfT3hz
Clang: https://godbolt.org/z/PT5EqKn4c

/Bruce

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-09  9:29               ` Bruce Richardson
@ 2024-05-09 11:37                 ` Morten Brørup
  2024-05-09 14:19                   ` Morten Brørup
  2024-05-26  7:03                   ` Mattias Rönnblom
  0 siblings, 2 replies; 179+ messages in thread
From: Morten Brørup @ 2024-05-09 11:37 UTC (permalink / raw)
  To: Bruce Richardson
  Cc: Stephen Hemminger, Ferruh Yigit, Mattias Rönnblom,
	John W. Linville, Thomas Monjalon, dev, Mattias Rönnblom

> From: Bruce Richardson [mailto:bruce.richardson@intel.com]
> Sent: Thursday, 9 May 2024 11.30
> 
> On Thu, May 09, 2024 at 09:43:16AM +0200, Morten Brørup wrote:
> > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > Sent: Wednesday, 8 May 2024 22.54
> > >
> > > On Wed, 8 May 2024 20:48:06 +0100
> > > Ferruh Yigit <ferruh.yigit@amd.com> wrote:
> > >
> > > > >
> > > > > The idea of load tearing is crazy talk of integral types. It
> would
> > > break so many things.
> > > > > It is the kind of stupid compiler thing that would send Linus on
> a
> > > rant and get
> > > > > the GCC compiler writers in trouble.
> > > > >
> > > > > The DPDK has always favored performance over strict safety guard
> > > rails everywhere.
> > > > > Switching to making every statistic an atomic operation is not
> in
> > > the spirit of
> > > > > what is required. There is no strict guarantee necessary here.
> > > > >
> > > >
> > > > I kind of agree with Stephen.
> > > >
> > > > Thanks Mattias, Morten & Stephen, it was informative discussion.
> But
> > > for
> > > > *SW drivers* stats update and reset is not core functionality and
> I
> > > > think we can be OK to get hit on corner cases, instead of
> > > > over-engineering or making code more complex.
> > >
> > >
> > > I forgot the case of 64 bit values on 32 bit platforms!
> > > Mostly because haven't cared about 32 bit for years...
> > >
> > > The Linux kernel uses some wrappers to handle this.
> > > On 64 bit platforms they become noop.
> > > On 32 bit platform, they are protected by a seqlock and updates are
> > > wrapped by the sequence count.
> > >
> > > If we go this way, then doing similar Noop on 64 bit and atomic or
> > > seqlock
> > > on 32 bit should be done, but in common helper.
> > >
> > > Looking inside FreeBSD, it looks like that has changed over the
> years as
> > > well.
> > >
> > > 	if_inc_counter
> > > 		counter_u64_add
> > > 			atomic_add_64
> > > But the counters are always per-cpu in this case. So although it
> does
> > > use
> > > locked operation, will always be uncontended.
> > >
> > >
> > > PS: Does DPDK still actually support 32 bit on x86? Can it be
> dropped
> > > this cycle?
> >
> > We cannot drop 32 bit architecture support altogether.
> >
> > But, unlike the Linux kernel, DPDK doesn't need to support ancient 32
> bit architectures.
> > If the few 32 bit architectures supported by DPDK provide non-tearing
> 64 bit loads/stores, we don't need locks (in the fast path) for 64 bit
> counters.
> >
> > In addition to 32 bit x86, DPDK supports ARMv7-A (a 32 bit
> architecture) and 32 bit ARMv8.
> > I don't think DPDK support any other 32 bit architectures.
> >
> >
> > As Mattias mentioned, 32 bit x86 can use xmm registers to provide 64
> bit non-tearing load/store.
> >
> 
> Testing this a little in godbolt, I see gcc using xmm registers on 32-
> bit
> when updating 64-bit counters, but clang doesn't seem to do so, but
> instead
> does 2 stores when writing back the 64 value. (I tried with both
> volatile
> and non-volatile 64-bit values, just to see if volatile would encourage
> clang to do a single store).
> 
> GCC: https://godbolt.org/z/9eqKfT3hz
> Clang: https://godbolt.org/z/PT5EqKn4c

Interesting.
I guess this can be fixed by manually implementing what GCC does.

I'm more concerned about finding a high-performance (in the fast path) 64 bit counter solution for 32 bit ARM.


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-09 11:37                 ` Morten Brørup
@ 2024-05-09 14:19                   ` Morten Brørup
  2024-05-10  4:56                     ` Stephen Hemminger
  2024-05-26  7:07                     ` Mattias Rönnblom
  2024-05-26  7:03                   ` Mattias Rönnblom
  1 sibling, 2 replies; 179+ messages in thread
From: Morten Brørup @ 2024-05-09 14:19 UTC (permalink / raw)
  To: Bruce Richardson
  Cc: Stephen Hemminger, Ferruh Yigit, Mattias Rönnblom,
	John W. Linville, Thomas Monjalon, dev, Mattias Rönnblom

> From: Morten Brørup [mailto:mb@smartsharesystems.com]
> Sent: Thursday, 9 May 2024 13.37
> 
> > From: Bruce Richardson [mailto:bruce.richardson@intel.com]
> > Sent: Thursday, 9 May 2024 11.30
> >
> > On Thu, May 09, 2024 at 09:43:16AM +0200, Morten Brørup wrote:
> > > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > > Sent: Wednesday, 8 May 2024 22.54
> > > >
> > > > On Wed, 8 May 2024 20:48:06 +0100
> > > > Ferruh Yigit <ferruh.yigit@amd.com> wrote:
> > > >
> > > > > >
> > > > > > The idea of load tearing is crazy talk of integral types. It
> > would
> > > > break so many things.
> > > > > > It is the kind of stupid compiler thing that would send Linus
> on
> > a
> > > > rant and get
> > > > > > the GCC compiler writers in trouble.
> > > > > >
> > > > > > The DPDK has always favored performance over strict safety
> guard
> > > > rails everywhere.
> > > > > > Switching to making every statistic an atomic operation is not
> > in
> > > > the spirit of
> > > > > > what is required. There is no strict guarantee necessary here.
> > > > > >
> > > > >
> > > > > I kind of agree with Stephen.
> > > > >
> > > > > Thanks Mattias, Morten & Stephen, it was informative discussion.
> > But
> > > > for
> > > > > *SW drivers* stats update and reset is not core functionality
> and
> > I
> > > > > think we can be OK to get hit on corner cases, instead of
> > > > > over-engineering or making code more complex.
> > > >
> > > >
> > > > I forgot the case of 64 bit values on 32 bit platforms!
> > > > Mostly because haven't cared about 32 bit for years...
> > > >
> > > > The Linux kernel uses some wrappers to handle this.
> > > > On 64 bit platforms they become noop.
> > > > On 32 bit platform, they are protected by a seqlock and updates
> are
> > > > wrapped by the sequence count.
> > > >
> > > > If we go this way, then doing similar Noop on 64 bit and atomic or
> > > > seqlock
> > > > on 32 bit should be done, but in common helper.
> > > >
> > > > Looking inside FreeBSD, it looks like that has changed over the
> > years as
> > > > well.
> > > >
> > > > 	if_inc_counter
> > > > 		counter_u64_add
> > > > 			atomic_add_64
> > > > But the counters are always per-cpu in this case. So although it
> > does
> > > > use
> > > > locked operation, will always be uncontended.
> > > >
> > > >
> > > > PS: Does DPDK still actually support 32 bit on x86? Can it be
> > dropped
> > > > this cycle?
> > >
> > > We cannot drop 32 bit architecture support altogether.
> > >
> > > But, unlike the Linux kernel, DPDK doesn't need to support ancient
> 32
> > bit architectures.
> > > If the few 32 bit architectures supported by DPDK provide non-
> tearing
> > 64 bit loads/stores, we don't need locks (in the fast path) for 64 bit
> > counters.
> > >
> > > In addition to 32 bit x86, DPDK supports ARMv7-A (a 32 bit
> > architecture) and 32 bit ARMv8.
> > > I don't think DPDK support any other 32 bit architectures.
> > >
> > >
> > > As Mattias mentioned, 32 bit x86 can use xmm registers to provide 64
> > bit non-tearing load/store.
> > >
> >
> > Testing this a little in godbolt, I see gcc using xmm registers on 32-
> > bit
> > when updating 64-bit counters, but clang doesn't seem to do so, but
> > instead
> > does 2 stores when writing back the 64 value. (I tried with both
> > volatile
> > and non-volatile 64-bit values, just to see if volatile would
> encourage
> > clang to do a single store).
> >
> > GCC: https://godbolt.org/z/9eqKfT3hz
> > Clang: https://godbolt.org/z/PT5EqKn4c
> 
> Interesting.
> I guess this can be fixed by manually implementing what GCC does.
> 
> I'm more concerned about finding a high-performance (in the fast path)
> 64 bit counter solution for 32 bit ARM.

Reading up on the topic, and continuing Bruce's experiment on Godbolt, it is possible on 32 bit ARMv7-A too, using LDRD/STRD (Load/Store Register Dual) instructions, which load/store 64 bit from memory into two registers at once.

Clang is emits more efficient code without volatile.
GCC requires volatile to use STRD.

Clang: https://godbolt.org/z/WjdTq6EKh
GCC: https://godbolt.org/z/qq9j7d4Ea

Summing it up, it is possible to implement non-tearing 64 bit high-performance (lockless, barrier-free) counters on the 32 bit architectures supported by DPDK.

But the implementation is both architecture and compiler specific.
So it seems a "64 bit counters" library would be handy. (Or a "non-tearing 64 bit integers" library, for support of the signed variant too; but I don't think we need that.)
We can use uint64_t as the underlying type and type cast in the library (where needed by the specific compiler/architecture), or introduce a new rte_ctr64_t type to ensure that accessor functions are always used and the developer doesn't have to worry about tearing on 32 bit architectures.

The most simple variant of such a library only provides load and store functions. The API would look like this:

uint64_t inline
rte_ctr64_get(const rte_ctr64_t *const ctr);

void inline
rte_ctr64_set(rte_ctr64_t *const ctr, const uint64_t value);

And if some CPU offers special instructions for increment or addition, faster (regarding performance) and/or more compact (regarding instruction memory) than a sequence of load-add-store instructions:

void inline
rte_ctr64_inc(rte_ctr64_t *const ctr);

void inline
rte_ctr64_add(rte_ctr64_t *const ctr, const uint64_t value);

<feature creep>
And perhaps atomic variants of all these functions, with explicit and/or relaxed memory ordering, for counters shared by multiple threads.
</feature creep>


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-09 14:19                   ` Morten Brørup
@ 2024-05-10  4:56                     ` Stephen Hemminger
  2024-05-10  9:14                       ` Morten Brørup
  2024-05-26  7:07                     ` Mattias Rönnblom
  1 sibling, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-10  4:56 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Bruce Richardson, Ferruh Yigit, Mattias Rönnblom,
	John W. Linville, Thomas Monjalon, dev, Mattias Rönnblom

On Thu, 9 May 2024 16:19:08 +0200
Morten Brørup <mb@smartsharesystems.com> wrote:

> > From: Morten Brørup [mailto:mb@smartsharesystems.com]
> > Sent: Thursday, 9 May 2024 13.37
> >   
> > > From: Bruce Richardson [mailto:bruce.richardson@intel.com]
> > > Sent: Thursday, 9 May 2024 11.30
> > >
> > > On Thu, May 09, 2024 at 09:43:16AM +0200, Morten Brørup wrote:  
> > > > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > > > Sent: Wednesday, 8 May 2024 22.54
> > > > >
> > > > > On Wed, 8 May 2024 20:48:06 +0100
> > > > > Ferruh Yigit <ferruh.yigit@amd.com> wrote:
> > > > >  
> > > > > > >
> > > > > > > The idea of load tearing is crazy talk of integral types. It  
> > > would  
> > > > > break so many things.  
> > > > > > > It is the kind of stupid compiler thing that would send Linus  
> > on  
> > > a  
> > > > > rant and get  
> > > > > > > the GCC compiler writers in trouble.
> > > > > > >
> > > > > > > The DPDK has always favored performance over strict safety  
> > guard  
> > > > > rails everywhere.  
> > > > > > > Switching to making every statistic an atomic operation is not  
> > > in  
> > > > > the spirit of  
> > > > > > > what is required. There is no strict guarantee necessary here.
> > > > > > >  
> > > > > >
> > > > > > I kind of agree with Stephen.
> > > > > >
> > > > > > Thanks Mattias, Morten & Stephen, it was informative discussion.  
> > > But  
> > > > > for  
> > > > > > *SW drivers* stats update and reset is not core functionality  
> > and  
> > > I  
> > > > > > think we can be OK to get hit on corner cases, instead of
> > > > > > over-engineering or making code more complex.  
> > > > >
> > > > >
> > > > > I forgot the case of 64 bit values on 32 bit platforms!
> > > > > Mostly because haven't cared about 32 bit for years...
> > > > >
> > > > > The Linux kernel uses some wrappers to handle this.
> > > > > On 64 bit platforms they become noop.
> > > > > On 32 bit platform, they are protected by a seqlock and updates  
> > are  
> > > > > wrapped by the sequence count.
> > > > >
> > > > > If we go this way, then doing similar Noop on 64 bit and atomic or
> > > > > seqlock
> > > > > on 32 bit should be done, but in common helper.
> > > > >
> > > > > Looking inside FreeBSD, it looks like that has changed over the  
> > > years as  
> > > > > well.
> > > > >
> > > > > 	if_inc_counter
> > > > > 		counter_u64_add
> > > > > 			atomic_add_64
> > > > > But the counters are always per-cpu in this case. So although it  
> > > does  
> > > > > use
> > > > > locked operation, will always be uncontended.
> > > > >
> > > > >
> > > > > PS: Does DPDK still actually support 32 bit on x86? Can it be  
> > > dropped  
> > > > > this cycle?  
> > > >
> > > > We cannot drop 32 bit architecture support altogether.
> > > >
> > > > But, unlike the Linux kernel, DPDK doesn't need to support ancient  
> > 32  
> > > bit architectures.  
> > > > If the few 32 bit architectures supported by DPDK provide non-  
> > tearing  
> > > 64 bit loads/stores, we don't need locks (in the fast path) for 64 bit
> > > counters.  
> > > >
> > > > In addition to 32 bit x86, DPDK supports ARMv7-A (a 32 bit  
> > > architecture) and 32 bit ARMv8.  
> > > > I don't think DPDK support any other 32 bit architectures.
> > > >
> > > >
> > > > As Mattias mentioned, 32 bit x86 can use xmm registers to provide 64  
> > > bit non-tearing load/store.  
> > > >  
> > >
> > > Testing this a little in godbolt, I see gcc using xmm registers on 32-
> > > bit
> > > when updating 64-bit counters, but clang doesn't seem to do so, but
> > > instead
> > > does 2 stores when writing back the 64 value. (I tried with both
> > > volatile
> > > and non-volatile 64-bit values, just to see if volatile would  
> > encourage  
> > > clang to do a single store).
> > >
> > > GCC: https://godbolt.org/z/9eqKfT3hz
> > > Clang: https://godbolt.org/z/PT5EqKn4c  
> > 
> > Interesting.
> > I guess this can be fixed by manually implementing what GCC does.
> > 
> > I'm more concerned about finding a high-performance (in the fast path)
> > 64 bit counter solution for 32 bit ARM.  
> 
> Reading up on the topic, and continuing Bruce's experiment on Godbolt, it is possible on 32 bit ARMv7-A too, using LDRD/STRD (Load/Store Register Dual) instructions, which load/store 64 bit from memory into two registers at once.
> 
> Clang is emits more efficient code without volatile.
> GCC requires volatile to use STRD.
> 
> Clang: https://godbolt.org/z/WjdTq6EKh
> GCC: https://godbolt.org/z/qq9j7d4Ea
> 
> Summing it up, it is possible to implement non-tearing 64 bit high-performance (lockless, barrier-free) counters on the 32 bit architectures supported by DPDK.
> 
> But the implementation is both architecture and compiler specific.
> So it seems a "64 bit counters" library would be handy. (Or a "non-tearing 64 bit integers" library, for support of the signed variant too; but I don't think we need that.)
> We can use uint64_t as the underlying type and type cast in the library (where needed by the specific compiler/architecture), or introduce a new rte_ctr64_t type to ensure that accessor functions are always used and the developer doesn't have to worry about tearing on 32 bit architectures.
> 
> The most simple variant of such a library only provides load and store functions. The API would look like this:
> 
> uint64_t inline
> rte_ctr64_get(const rte_ctr64_t *const ctr);
> 
> void inline
> rte_ctr64_set(rte_ctr64_t *const ctr, const uint64_t value);
> 
> And if some CPU offers special instructions for increment or addition, faster (regarding performance) and/or more compact (regarding instruction memory) than a sequence of load-add-store instructions:
> 
> void inline
> rte_ctr64_inc(rte_ctr64_t *const ctr);
> 
> void inline
> rte_ctr64_add(rte_ctr64_t *const ctr, const uint64_t value);
> 
> <feature creep>
> And perhaps atomic variants of all these functions, with explicit and/or relaxed memory ordering, for counters shared by multiple threads.
> </feature creep>
> 


This kind of what I am experimenting with but...
Intend to keep the details of the counters read and update in one file and not as inlines.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [RFC 0/3] generic sw counters
  2024-04-25 17:46 [RFC] net/af_packet: make stats reset reliable Ferruh Yigit
                   ` (3 preceding siblings ...)
  2024-05-03 15:45 ` [RFC v3] " Ferruh Yigit
@ 2024-05-10  5:01 ` Stephen Hemminger
  2024-05-10  5:01   ` [RFC 1/3] ethdev: add internal helper of SW driver statistics Stephen Hemminger
                     ` (12 more replies)
  4 siblings, 13 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-10  5:01 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

This is my attempt to demonstrate:
  - generic counters for SW drivers, the example does af_packet and tap
    but same should be applied to af_xdp, virtio, etc.
  - counters are safe against 64 bit tearing on 32 bit platform
 
The naming and organization could be improved:
  - should this be in rte_ethdev?
  - better name for struct and vairables?
  - audit and handle errors better.

Stephen Hemminger (3):
  ethdev: add internal helper of SW driver statistics
  net/af_packet: use SW stats helper
  net/tap: use generic SW stats

 drivers/net/af_packet/rte_eth_af_packet.c |  97 ++-----
 drivers/net/tap/rte_eth_tap.c             | 100 ++------
 drivers/net/tap/rte_eth_tap.h             |  17 +-
 lib/ethdev/ethdev_swstats.c               | 294 ++++++++++++++++++++++
 lib/ethdev/ethdev_swstats.h               |  60 +++++
 lib/ethdev/meson.build                    |   2 +
 lib/ethdev/version.map                    |   7 +
 7 files changed, 400 insertions(+), 177 deletions(-)
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [RFC 1/3] ethdev: add internal helper of SW driver statistics
  2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
@ 2024-05-10  5:01   ` Stephen Hemminger
  2024-05-10  5:01   ` [RFC 2/3] net/af_packet: use SW stats helper Stephen Hemminger
                     ` (11 subsequent siblings)
  12 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-10  5:01 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

This clones the staistic update code from virtio for use
by other drivers. It also uses native uint64_t on 64 bit platform
but atomic operations on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

ethdev: use atomic on 32
---
 lib/ethdev/ethdev_swstats.c | 294 ++++++++++++++++++++++++++++++++++++
 lib/ethdev/ethdev_swstats.h |  60 ++++++++
 lib/ethdev/meson.build      |   2 +
 lib/ethdev/version.map      |   7 +
 4 files changed, 363 insertions(+)
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

diff --git a/lib/ethdev/ethdev_swstats.c b/lib/ethdev/ethdev_swstats.c
new file mode 100644
index 0000000000..81b9ac13b5
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.c
@@ -0,0 +1,294 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#include <stdbool.h>
+
+#include <rte_common.h>
+#include <ethdev_driver.h>
+
+#include "rte_ethdev.h"
+#include "ethdev_swstats.h"
+
+/*
+ * Handling of 64 bit counters to problems with load/store tearing on 32 bit.
+ * Store of aligned 64 bit never gets seperated on 64 bit platform.
+ * But on 32 bit need to use atomic.
+ */
+#ifdef RTE_ARCH_64
+typedef uint64_t eth_counter_t;
+
+static inline void
+eth_counter_add(eth_counter_t *counter, uint32_t val)
+{
+	counter += val;
+}
+
+static inline uint64_t
+eth_counter_read(const eth_counter_t *counter)
+{
+	return *counter;
+}
+
+static inline void
+eth_counter_reset(eth_counter_t *counter)
+{
+	*counter = 0;
+}
+#else
+static inline void
+eth_counter_add(eth_counter_t *counter, uint32_t val)
+{
+	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
+}
+
+static inline uint64_t
+eth_counter_read(const eth_counter_t *counter)
+{
+	return rte_atomic_load_explicit(counter, rte_memory_order_relaxed);
+}
+
+static inline void
+eth_counter_reset(eth_counter_t *counter)
+{
+	rte_atomic_store_explicit(counter, 0, rte_memory_order_relaxed);
+}
+
+#endif
+
+static void
+eth_qsw_reset(struct rte_eth_qsw_stats *qstats)
+{
+	unsigned int i;
+
+	eth_counter_reset(&qstats->packets);
+	eth_counter_reset(&qstats->bytes);
+	eth_counter_reset(&qstats->multicast);
+	eth_counter_reset(&qstats->broadcast);
+
+	for (i = 0; i < RTE_DIM(qstats->size_bins); i++)
+		eth_counter_reset(&qstats->size_bins[i]);
+}
+
+void
+rte_eth_qsw_update(struct rte_eth_qsw_stats *qstats, const struct rte_mbuf *mbuf)
+{
+	uint32_t s = mbuf->pkt_len;
+	uint32_t bin;
+	const struct rte_ether_addr *ea;
+
+	if (s == 64) {
+		bin = 1;
+	} else if (s > 64 && s < 1024) {
+		/* count zeros, and offset into correct bin */
+		bin = (sizeof(s) * 8) - rte_clz32(s) - 5;
+	} else if (s < 64) {
+		bin = 0;
+	} else if (s < 1519) {
+		bin = 6;
+	} else {
+		bin = 7;
+	}
+
+	eth_counter_add(&qstats->packets, 1);
+	eth_counter_add(&qstats->bytes, s);
+	eth_counter_add(&qstats->size_bins[bin], 1);
+
+	ea = rte_pktmbuf_mtod(mbuf, const struct rte_ether_addr *);
+	if (rte_is_multicast_ether_addr(ea)) {
+		if (rte_is_broadcast_ether_addr(ea))
+			eth_counter_add(&qstats->broadcast, 1);
+		else
+			eth_counter_add(&qstats->multicast, 1);
+	}
+}
+
+void
+rte_eth_qsw_error_inc(struct rte_eth_qsw_stats *qstats)
+{
+	eth_counter_add(&qstats->errors, 1);
+}
+
+int
+rte_eth_qsw_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	unsigned int i;
+	uint64_t packets, bytes, errors;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		/* assumes that rte_eth_qsw_stats is at start of the queue structure */
+		const struct rte_eth_qsw_stats *qstats = dev->data->tx_queues[i];
+
+		if (qstats == NULL)
+			continue;
+
+		packets = eth_counter_read(&qstats->packets);
+		bytes = eth_counter_read(&qstats->bytes);
+		errors = eth_counter_read(&qstats->errors);
+
+		stats->opackets += packets;
+		stats->obytes += bytes;
+		stats->oerrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_opackets[i] = packets;
+			stats->q_obytes[i] = bytes;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		/* assumes that rte_eth_qsw_stats is at start of the queue structure */
+		const struct rte_eth_qsw_stats *qstats = dev->data->rx_queues[i];
+
+		if (qstats == NULL)
+			continue;
+
+		packets = eth_counter_read(&qstats->packets);
+		bytes = eth_counter_read(&qstats->bytes);
+		errors = eth_counter_read(&qstats->errors);
+
+		stats->ipackets += packets;
+		stats->ibytes += bytes;
+		stats->ierrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_ipackets[i] = packets;
+			stats->q_ibytes[i] = bytes;
+		}
+	}
+
+	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
+	return 0;
+}
+
+int
+rte_eth_qsw_stats_reset(struct rte_eth_dev *dev)
+{
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		struct rte_eth_qsw_stats *qstats = dev->data->tx_queues[i];
+
+		if (qstats != NULL)
+			eth_qsw_reset(qstats);
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct rte_eth_qsw_stats *qstats = dev->data->rx_queues[i];
+
+		if (qstats != NULL)
+			eth_qsw_reset(qstats);
+	}
+
+	return 0;
+}
+
+struct xstats_name_off {
+	char name[RTE_ETH_XSTATS_NAME_SIZE];
+	size_t offset;
+};
+
+/* [rt]x_qX_ is prepended to the name string here */
+static const struct xstats_name_off eth_swstats_strings[] = {
+	{"good_packets",           offsetof(struct rte_eth_qsw_stats, packets)},
+	{"good_bytes",             offsetof(struct rte_eth_qsw_stats, bytes)},
+	{"errors",                 offsetof(struct rte_eth_qsw_stats, errors)},
+	{"multicast_packets",      offsetof(struct rte_eth_qsw_stats, multicast)},
+	{"broadcast_packets",      offsetof(struct rte_eth_qsw_stats, broadcast)},
+	{"undersize_packets",      offsetof(struct rte_eth_qsw_stats, size_bins[0])},
+	{"size_64_packets",        offsetof(struct rte_eth_qsw_stats, size_bins[1])},
+	{"size_65_127_packets",    offsetof(struct rte_eth_qsw_stats, size_bins[2])},
+	{"size_128_255_packets",   offsetof(struct rte_eth_qsw_stats, size_bins[3])},
+	{"size_256_511_packets",   offsetof(struct rte_eth_qsw_stats, size_bins[4])},
+	{"size_512_1023_packets",  offsetof(struct rte_eth_qsw_stats, size_bins[5])},
+	{"size_1024_1518_packets", offsetof(struct rte_eth_qsw_stats, size_bins[6])},
+	{"size_1519_max_packets",  offsetof(struct rte_eth_qsw_stats, size_bins[7])},
+};
+#define NUM_SWSTATS_XSTATS RTE_DIM(eth_swstats_strings)
+
+
+int
+rte_eth_qsw_xstats_get_names(struct rte_eth_dev *dev,
+				struct rte_eth_xstat_name *xstats_names,
+				__rte_unused unsigned limit)
+{
+	unsigned int i, t, count = 0;
+
+	if (xstats_names == NULL)
+		return (dev->data->nb_tx_queues + dev->data->nb_rx_queues) * NUM_SWSTATS_XSTATS;
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		const void *rxq = dev->data->rx_queues[i];
+
+		if (rxq == NULL)
+			continue;
+
+		for (t = 0; t < NUM_SWSTATS_XSTATS; t++) {
+			snprintf(xstats_names[count].name, sizeof(xstats_names[count].name),
+				 "rx_q%u_%s", i, eth_swstats_strings[t].name);
+			count++;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		const void *txq = dev->data->tx_queues[i];
+
+		if (txq == NULL)
+			continue;
+
+		for (t = 0; t < NUM_SWSTATS_XSTATS; t++) {
+			snprintf(xstats_names[count].name, sizeof(xstats_names[count].name),
+				 "tx_q%u_%s", i, eth_swstats_strings[t].name);
+			count++;
+		}
+	}
+	return count;
+}
+
+int
+rte_eth_qsw_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, unsigned int n)
+{
+	unsigned int i, t, count = 0;
+	const unsigned int nstats
+		= (dev->data->nb_tx_queues + dev->data->nb_rx_queues) * NUM_SWSTATS_XSTATS;
+
+	if (n < nstats)
+		return nstats;
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		/* assumes that rte_eth_qsw_stats is at start of the queue structure */
+		const struct rte_eth_qsw_stats *qstats = dev->data->rx_queues[i];
+
+		if (qstats == NULL)
+			continue;
+
+		for (t = 0; t < NUM_SWSTATS_XSTATS; t++) {
+			const uint64_t *valuep
+				= (const uint64_t *)((const char *)qstats
+						     + eth_swstats_strings[t].offset);
+
+			xstats[count].value = *valuep;
+			xstats[count].id = count;
+			++count;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		const struct rte_eth_qsw_stats *qstats = dev->data->tx_queues[i];
+
+		if (qstats == NULL)
+			continue;
+
+		for (t = 0; t < NUM_SWSTATS_XSTATS; t++) {
+			const uint64_t *valuep
+				= (const uint64_t *)((const char *)qstats
+						     + eth_swstats_strings[t].offset);
+
+			xstats[count].value = *valuep;
+			xstats[count].id = count;
+			++count;
+		}
+	}
+
+	return count;
+}
diff --git a/lib/ethdev/ethdev_swstats.h b/lib/ethdev/ethdev_swstats.h
new file mode 100644
index 0000000000..6309107128
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_ETHDEV_SWSTATS_H_
+#define _RTE_ETHDEV_SWSTATS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_stdatomic.h>
+
+#ifdef RTE_ARCH_64
+typedef uint64_t eth_counter_t;
+#else
+typedef RTE_ATOMIC(uint64_t) eth_counter_t;
+#endif
+
+struct rte_eth_qsw_stats {
+	eth_counter_t	packets;
+	eth_counter_t	bytes;
+	eth_counter_t	errors;
+	eth_counter_t	multicast;
+	eth_counter_t	broadcast;
+	/* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */
+	eth_counter_t	size_bins[8];
+};
+
+__rte_internal
+void
+rte_eth_qsw_update(struct rte_eth_qsw_stats *stats, const struct rte_mbuf *mbuf);
+
+__rte_internal
+void
+rte_eth_qsw_error_inc(struct rte_eth_qsw_stats *stats);
+
+__rte_internal
+int
+rte_eth_qsw_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
+
+__rte_internal
+int
+rte_eth_qsw_stats_reset(struct rte_eth_dev *dev);
+
+__rte_internal
+int
+rte_eth_qsw_xstats_get_names(struct rte_eth_dev *dev,
+				struct rte_eth_xstat_name *xstats_names,
+				unsigned int limit);
+__rte_internal
+int
+rte_eth_qsw_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
+			  unsigned int n);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHDEV_SWSTATS_H_ */
diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build
index f1d2586591..7ce29a46d4 100644
--- a/lib/ethdev/meson.build
+++ b/lib/ethdev/meson.build
@@ -3,6 +3,7 @@
 
 sources = files(
         'ethdev_driver.c',
+        'ethdev_swstats.c',
         'ethdev_private.c',
         'ethdev_profile.c',
         'ethdev_trace_points.c',
@@ -42,6 +43,7 @@ driver_sdk_headers += files(
         'ethdev_driver.h',
         'ethdev_pci.h',
         'ethdev_vdev.h',
+        'ethdev_swstats.h',
 )
 
 if is_linux
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 79f6f5293b..32ebe5ea09 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -358,4 +358,11 @@ INTERNAL {
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
 	rte_flow_fp_default_ops;
+
+	rte_eth_qsw_error_inc;
+	rte_eth_qsw_stats_get;
+	rte_eth_qsw_stats_reset;
+	rte_eth_qsw_update;
+	rte_eth_qsw_xstats_get;
+	rte_eth_qsw_xstats_get_names;
 };
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [RFC 2/3] net/af_packet: use SW stats helper
  2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
  2024-05-10  5:01   ` [RFC 1/3] ethdev: add internal helper of SW driver statistics Stephen Hemminger
@ 2024-05-10  5:01   ` Stephen Hemminger
  2024-05-10  5:01   ` [RFC 3/3] net/tap: use generic SW stats Stephen Hemminger
                     ` (10 subsequent siblings)
  12 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-10  5:01 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use the new generic SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_packet/rte_eth_af_packet.c | 97 ++++-------------------
 1 file changed, 16 insertions(+), 81 deletions(-)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 397a32db58..8fac37a1b1 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -10,6 +10,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
@@ -29,6 +30,7 @@
 #include <unistd.h>
 #include <poll.h>
 
+
 #define ETH_AF_PACKET_IFACE_ARG		"iface"
 #define ETH_AF_PACKET_NUM_Q_ARG		"qpairs"
 #define ETH_AF_PACKET_BLOCKSIZE_ARG	"blocksz"
@@ -40,6 +42,8 @@
 #define DFLT_FRAME_COUNT	(1 << 9)
 
 struct pkt_rx_queue {
+	struct rte_eth_qsw_stats stats;	/* must be first */
+
 	int sockfd;
 
 	struct iovec *rd;
@@ -50,12 +54,11 @@ struct pkt_rx_queue {
 	struct rte_mempool *mb_pool;
 	uint16_t in_port;
 	uint8_t vlan_strip;
-
-	volatile unsigned long rx_pkts;
-	volatile unsigned long rx_bytes;
 };
 
 struct pkt_tx_queue {
+	struct rte_eth_qsw_stats stats;	/* must be first */
+
 	int sockfd;
 	unsigned int frame_data_size;
 
@@ -63,10 +66,6 @@ struct pkt_tx_queue {
 	uint8_t *map;
 	unsigned int framecount;
 	unsigned int framenum;
-
-	volatile unsigned long tx_pkts;
-	volatile unsigned long err_pkts;
-	volatile unsigned long tx_bytes;
 };
 
 struct pmd_internals {
@@ -118,8 +117,6 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	uint8_t *pbuf;
 	struct pkt_rx_queue *pkt_q = queue;
-	uint16_t num_rx = 0;
-	unsigned long num_rx_bytes = 0;
 	unsigned int framecount, framenum;
 
 	if (unlikely(nb_pkts == 0))
@@ -164,13 +161,11 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* account for the receive frame */
 		bufs[i] = mbuf;
-		num_rx++;
-		num_rx_bytes += mbuf->pkt_len;
+		rte_eth_qsw_update(&pkt_q->stats, mbuf);
 	}
 	pkt_q->framenum = framenum;
-	pkt_q->rx_pkts += num_rx;
-	pkt_q->rx_bytes += num_rx_bytes;
-	return num_rx;
+
+	return i;
 }
 
 /*
@@ -205,8 +200,6 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	unsigned int framecount, framenum;
 	struct pollfd pfd;
 	struct pkt_tx_queue *pkt_q = queue;
-	uint16_t num_tx = 0;
-	unsigned long num_tx_bytes = 0;
 	int i;
 
 	if (unlikely(nb_pkts == 0))
@@ -285,8 +278,7 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			framenum = 0;
 		ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base;
 
-		num_tx++;
-		num_tx_bytes += mbuf->pkt_len;
+		rte_eth_qsw_update(&pkt_q->stats, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -298,15 +290,9 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		 * packets will be considered successful even though only some
 		 * are sent.
 		 */
-
-		num_tx = 0;
-		num_tx_bytes = 0;
 	}
 
 	pkt_q->framenum = framenum;
-	pkt_q->tx_pkts += num_tx;
-	pkt_q->err_pkts += i - num_tx;
-	pkt_q->tx_bytes += num_tx_bytes;
 	return i;
 }
 
@@ -385,61 +371,6 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	return 0;
 }
 
-static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
-{
-	unsigned i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
-		igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
-		rx_total += igb_stats->q_ipackets[i];
-		rx_bytes_total += igb_stats->q_ibytes[i];
-	}
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
-		igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
-		tx_total += igb_stats->q_opackets[i];
-		tx_err_total += internal->tx_queue[i].err_pkts;
-		tx_bytes_total += igb_stats->q_obytes[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->ibytes = rx_bytes_total;
-	igb_stats->opackets = tx_total;
-	igb_stats->oerrors = tx_err_total;
-	igb_stats->obytes = tx_bytes_total;
-	return 0;
-}
-
-static int
-eth_stats_reset(struct rte_eth_dev *dev)
-{
-	unsigned i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->rx_queue[i].rx_pkts = 0;
-		internal->rx_queue[i].rx_bytes = 0;
-	}
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->tx_queue[i].tx_pkts = 0;
-		internal->tx_queue[i].err_pkts = 0;
-		internal->tx_queue[i].tx_bytes = 0;
-	}
-
-	return 0;
-}
-
 static int
 eth_dev_close(struct rte_eth_dev *dev)
 {
@@ -634,8 +565,12 @@ static const struct eth_dev_ops ops = {
 	.rx_queue_setup = eth_rx_queue_setup,
 	.tx_queue_setup = eth_tx_queue_setup,
 	.link_update = eth_link_update,
-	.stats_get = eth_stats_get,
-	.stats_reset = eth_stats_reset,
+
+	.stats_get = rte_eth_qsw_stats_get,
+	.stats_reset = rte_eth_qsw_stats_reset,
+	.xstats_get = rte_eth_qsw_xstats_get,
+	.xstats_get_names = rte_eth_qsw_xstats_get_names,
+	.xstats_reset = rte_eth_qsw_stats_reset,
 };
 
 /*
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [RFC 3/3] net/tap: use generic SW stats
  2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
  2024-05-10  5:01   ` [RFC 1/3] ethdev: add internal helper of SW driver statistics Stephen Hemminger
  2024-05-10  5:01   ` [RFC 2/3] net/af_packet: use SW stats helper Stephen Hemminger
@ 2024-05-10  5:01   ` Stephen Hemminger
  2024-05-10 17:29   ` [RFC 0/3] generic sw counters Morten Brørup
                     ` (9 subsequent siblings)
  12 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-10  5:01 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use new common sw statistics.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/tap/rte_eth_tap.c | 100 ++++++----------------------------
 drivers/net/tap/rte_eth_tap.h |  17 ++----
 2 files changed, 21 insertions(+), 96 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 69d9da695b..faf978b59e 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -432,7 +432,6 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rx_queue *rxq = queue;
 	struct pmd_process_private *process_private;
 	uint16_t num_rx;
-	unsigned long num_rx_bytes = 0;
 	uint32_t trigger = tap_trigger;
 
 	if (trigger == rxq->trigger_seen)
@@ -455,7 +454,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* Packet couldn't fit in the provided mbuf */
 		if (unlikely(rxq->pi.flags & TUN_PKT_STRIP)) {
-			rxq->stats.ierrors++;
+			rte_eth_qsw_error_inc(&rxq->stats);
 			continue;
 		}
 
@@ -467,7 +466,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
 
 			if (unlikely(!buf)) {
-				rxq->stats.rx_nombuf++;
+				struct rte_eth_dev *dev = &rte_eth_devices[rxq->in_port];
+				++dev->data->rx_mbuf_alloc_failed;
+
 				/* No new buf has been allocated: do nothing */
 				if (!new_tail || !seg)
 					goto end;
@@ -509,11 +510,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* account for the receive frame */
 		bufs[num_rx++] = mbuf;
-		num_rx_bytes += mbuf->pkt_len;
+		rte_eth_qsw_update(&rxq->stats, mbuf);
 	}
 end:
-	rxq->stats.ipackets += num_rx;
-	rxq->stats.ibytes += num_rx_bytes;
 
 	if (trigger && num_rx < nb_pkts)
 		rxq->trigger_seen = trigger;
@@ -523,8 +522,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 static inline int
 tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
-			struct rte_mbuf **pmbufs,
-			uint16_t *num_packets, unsigned long *num_tx_bytes)
+		struct rte_mbuf **pmbufs)
 {
 	struct pmd_process_private *process_private;
 	int i;
@@ -647,8 +645,7 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
 		if (n <= 0)
 			return -1;
 
-		(*num_packets)++;
-		(*num_tx_bytes) += rte_pktmbuf_pkt_len(mbuf);
+		rte_eth_qsw_update(&txq->stats, mbuf);
 	}
 	return 0;
 }
@@ -660,8 +657,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	struct tx_queue *txq = queue;
 	uint16_t num_tx = 0;
-	uint16_t num_packets = 0;
-	unsigned long num_tx_bytes = 0;
 	uint32_t max_size;
 	int i;
 
@@ -693,7 +688,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			tso_segsz = mbuf_in->tso_segsz + hdrs_len;
 			if (unlikely(tso_segsz == hdrs_len) ||
 				tso_segsz > *txq->mtu) {
-				txq->stats.errs++;
+				rte_eth_qsw_error_inc(&txq->stats);
 				break;
 			}
 			gso_ctx->gso_size = tso_segsz;
@@ -728,10 +723,10 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			num_mbufs = 1;
 		}
 
-		ret = tap_write_mbufs(txq, num_mbufs, mbuf,
-				&num_packets, &num_tx_bytes);
+		ret = tap_write_mbufs(txq, num_mbufs, mbuf);
 		if (ret == -1) {
-			txq->stats.errs++;
+			rte_eth_qsw_error_inc(&txq->stats);
+
 			/* free tso mbufs */
 			if (num_tso_mbufs > 0)
 				rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs);
@@ -749,10 +744,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		}
 	}
 
-	txq->stats.opackets += num_packets;
-	txq->stats.errs += nb_pkts - num_tx;
-	txq->stats.obytes += num_tx_bytes;
-
 	return num_tx;
 }
 
@@ -1052,68 +1043,6 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	return 0;
 }
 
-static int
-tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
-{
-	unsigned int i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	unsigned long rx_nombuf = 0, ierrors = 0;
-	const struct pmd_internals *pmd = dev->data->dev_private;
-
-	/* rx queue statistics */
-	imax = (dev->data->nb_rx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_rx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
-		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
-		rx_total += tap_stats->q_ipackets[i];
-		rx_bytes_total += tap_stats->q_ibytes[i];
-		rx_nombuf += pmd->rxq[i].stats.rx_nombuf;
-		ierrors += pmd->rxq[i].stats.ierrors;
-	}
-
-	/* tx queue statistics */
-	imax = (dev->data->nb_tx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_tx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
-		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
-		tx_total += tap_stats->q_opackets[i];
-		tx_err_total += pmd->txq[i].stats.errs;
-		tx_bytes_total += tap_stats->q_obytes[i];
-	}
-
-	tap_stats->ipackets = rx_total;
-	tap_stats->ibytes = rx_bytes_total;
-	tap_stats->ierrors = ierrors;
-	tap_stats->rx_nombuf = rx_nombuf;
-	tap_stats->opackets = tx_total;
-	tap_stats->oerrors = tx_err_total;
-	tap_stats->obytes = tx_bytes_total;
-	return 0;
-}
-
-static int
-tap_stats_reset(struct rte_eth_dev *dev)
-{
-	int i;
-	struct pmd_internals *pmd = dev->data->dev_private;
-
-	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-		pmd->rxq[i].stats.ipackets = 0;
-		pmd->rxq[i].stats.ibytes = 0;
-		pmd->rxq[i].stats.ierrors = 0;
-		pmd->rxq[i].stats.rx_nombuf = 0;
-
-		pmd->txq[i].stats.opackets = 0;
-		pmd->txq[i].stats.errs = 0;
-		pmd->txq[i].stats.obytes = 0;
-	}
-
-	return 0;
-}
 
 static int
 tap_dev_close(struct rte_eth_dev *dev)
@@ -1917,8 +1846,11 @@ static const struct eth_dev_ops ops = {
 	.mac_addr_set           = tap_mac_set,
 	.mtu_set                = tap_mtu_set,
 	.set_mc_addr_list       = tap_set_mc_addr_list,
-	.stats_get              = tap_stats_get,
-	.stats_reset            = tap_stats_reset,
+	.stats_get              = rte_eth_qsw_stats_get,
+	.stats_reset            = rte_eth_qsw_stats_reset,
+	.xstats_get_names       = rte_eth_qsw_xstats_get_names,
+	.xstats_get             = rte_eth_qsw_xstats_get,
+	.xstats_reset            = rte_eth_qsw_stats_reset,
 	.dev_supported_ptypes_get = tap_dev_supported_ptypes_get,
 	.rss_hash_update        = tap_rss_hash_update,
 	.flow_ops_get           = tap_dev_flow_ops_get,
diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
index 5ac93f93e9..c05a89a6ab 100644
--- a/drivers/net/tap/rte_eth_tap.h
+++ b/drivers/net/tap/rte_eth_tap.h
@@ -14,6 +14,7 @@
 #include <linux/if_tun.h>
 
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_ether.h>
 #include <rte_gso.h>
 #include "tap_log.h"
@@ -32,22 +33,13 @@ enum rte_tuntap_type {
 	ETH_TUNTAP_TYPE_MAX,
 };
 
-struct pkt_stats {
-	uint64_t opackets;              /* Number of output packets */
-	uint64_t ipackets;              /* Number of input packets */
-	uint64_t obytes;                /* Number of bytes on output */
-	uint64_t ibytes;                /* Number of bytes on input */
-	uint64_t errs;                  /* Number of TX error packets */
-	uint64_t ierrors;               /* Number of RX error packets */
-	uint64_t rx_nombuf;             /* Nb of RX mbuf alloc failures */
-};
-
 struct rx_queue {
+	struct rte_eth_qsw_stats stats;	/* MUST BE FIRST */
+
 	struct rte_mempool *mp;         /* Mempool for RX packets */
 	uint32_t trigger_seen;          /* Last seen Rx trigger value */
 	uint16_t in_port;               /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-	struct pkt_stats stats;         /* Stats for this RX queue */
 	uint16_t nb_rx_desc;            /* max number of mbufs available */
 	struct rte_eth_rxmode *rxmode;  /* RX features */
 	struct rte_mbuf *pool;          /* mbufs pool for this queue */
@@ -56,10 +48,11 @@ struct rx_queue {
 };
 
 struct tx_queue {
+	struct rte_eth_qsw_stats stats;	/* MUST BE FIRST */
+
 	int type;                       /* Type field - TUN|TAP */
 	uint16_t *mtu;                  /* Pointer to MTU from dev_data */
 	uint16_t csum:1;                /* Enable checksum offloading */
-	struct pkt_stats stats;         /* Stats for this TX queue */
 	struct rte_gso_ctx gso_ctx;     /* GSO context */
 	uint16_t out_port;              /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-10  4:56                     ` Stephen Hemminger
@ 2024-05-10  9:14                       ` Morten Brørup
  2024-05-26  7:10                         ` Mattias Rönnblom
  0 siblings, 1 reply; 179+ messages in thread
From: Morten Brørup @ 2024-05-10  9:14 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Bruce Richardson, Ferruh Yigit, Mattias Rönnblom,
	John W. Linville, Thomas Monjalon, dev, Mattias Rönnblom

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Friday, 10 May 2024 06.56
> 
> On Thu, 9 May 2024 16:19:08 +0200
> Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> > > From: Morten Brørup [mailto:mb@smartsharesystems.com]
> > > Sent: Thursday, 9 May 2024 13.37
> > >
> > > > From: Bruce Richardson [mailto:bruce.richardson@intel.com]
> > > > Sent: Thursday, 9 May 2024 11.30
> > > >
> > > > On Thu, May 09, 2024 at 09:43:16AM +0200, Morten Brørup wrote:
> > > > > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > > > > Sent: Wednesday, 8 May 2024 22.54
> > > > > >
> > > > > > On Wed, 8 May 2024 20:48:06 +0100
> > > > > > Ferruh Yigit <ferruh.yigit@amd.com> wrote:
> > > > > >
> > > > > > > >
> > > > > > > > The idea of load tearing is crazy talk of integral types.
> It
> > > > would
> > > > > > break so many things.
> > > > > > > > It is the kind of stupid compiler thing that would send
> Linus
> > > on
> > > > a
> > > > > > rant and get
> > > > > > > > the GCC compiler writers in trouble.
> > > > > > > >
> > > > > > > > The DPDK has always favored performance over strict safety
> > > guard
> > > > > > rails everywhere.
> > > > > > > > Switching to making every statistic an atomic operation is
> not
> > > > in
> > > > > > the spirit of
> > > > > > > > what is required. There is no strict guarantee necessary
> here.
> > > > > > > >
> > > > > > >
> > > > > > > I kind of agree with Stephen.
> > > > > > >
> > > > > > > Thanks Mattias, Morten & Stephen, it was informative
> discussion.
> > > > But
> > > > > > for
> > > > > > > *SW drivers* stats update and reset is not core
> functionality
> > > and
> > > > I
> > > > > > > think we can be OK to get hit on corner cases, instead of
> > > > > > > over-engineering or making code more complex.
> > > > > >
> > > > > >
> > > > > > I forgot the case of 64 bit values on 32 bit platforms!
> > > > > > Mostly because haven't cared about 32 bit for years...
> > > > > >
> > > > > > The Linux kernel uses some wrappers to handle this.
> > > > > > On 64 bit platforms they become noop.
> > > > > > On 32 bit platform, they are protected by a seqlock and
> updates
> > > are
> > > > > > wrapped by the sequence count.
> > > > > >
> > > > > > If we go this way, then doing similar Noop on 64 bit and
> atomic or
> > > > > > seqlock
> > > > > > on 32 bit should be done, but in common helper.
> > > > > >
> > > > > > Looking inside FreeBSD, it looks like that has changed over
> the
> > > > years as
> > > > > > well.
> > > > > >
> > > > > > 	if_inc_counter
> > > > > > 		counter_u64_add
> > > > > > 			atomic_add_64
> > > > > > But the counters are always per-cpu in this case. So although
> it
> > > > does
> > > > > > use
> > > > > > locked operation, will always be uncontended.
> > > > > >
> > > > > >
> > > > > > PS: Does DPDK still actually support 32 bit on x86? Can it be
> > > > dropped
> > > > > > this cycle?
> > > > >
> > > > > We cannot drop 32 bit architecture support altogether.
> > > > >
> > > > > But, unlike the Linux kernel, DPDK doesn't need to support
> ancient
> > > 32
> > > > bit architectures.
> > > > > If the few 32 bit architectures supported by DPDK provide non-
> > > tearing
> > > > 64 bit loads/stores, we don't need locks (in the fast path) for 64
> bit
> > > > counters.
> > > > >
> > > > > In addition to 32 bit x86, DPDK supports ARMv7-A (a 32 bit
> > > > architecture) and 32 bit ARMv8.
> > > > > I don't think DPDK support any other 32 bit architectures.
> > > > >
> > > > >
> > > > > As Mattias mentioned, 32 bit x86 can use xmm registers to
> provide 64
> > > > bit non-tearing load/store.
> > > > >
> > > >
> > > > Testing this a little in godbolt, I see gcc using xmm registers on
> 32-
> > > > bit
> > > > when updating 64-bit counters, but clang doesn't seem to do so,
> but
> > > > instead
> > > > does 2 stores when writing back the 64 value. (I tried with both
> > > > volatile
> > > > and non-volatile 64-bit values, just to see if volatile would
> > > encourage
> > > > clang to do a single store).
> > > >
> > > > GCC: https://godbolt.org/z/9eqKfT3hz
> > > > Clang: https://godbolt.org/z/PT5EqKn4c
> > >
> > > Interesting.
> > > I guess this can be fixed by manually implementing what GCC does.
> > >
> > > I'm more concerned about finding a high-performance (in the fast
> path)
> > > 64 bit counter solution for 32 bit ARM.
> >
> > Reading up on the topic, and continuing Bruce's experiment on Godbolt,
> it is possible on 32 bit ARMv7-A too, using LDRD/STRD (Load/Store
> Register Dual) instructions, which load/store 64 bit from memory into
> two registers at once.
> >
> > Clang is emits more efficient code without volatile.
> > GCC requires volatile to use STRD.
> >
> > Clang: https://godbolt.org/z/WjdTq6EKh
> > GCC: https://godbolt.org/z/qq9j7d4Ea
> >
> > Summing it up, it is possible to implement non-tearing 64 bit high-
> performance (lockless, barrier-free) counters on the 32 bit
> architectures supported by DPDK.
> >
> > But the implementation is both architecture and compiler specific.
> > So it seems a "64 bit counters" library would be handy. (Or a "non-
> tearing 64 bit integers" library, for support of the signed variant too;
> but I don't think we need that.)
> > We can use uint64_t as the underlying type and type cast in the
> library (where needed by the specific compiler/architecture), or
> introduce a new rte_ctr64_t type to ensure that accessor functions are
> always used and the developer doesn't have to worry about tearing on 32
> bit architectures.
> >
> > The most simple variant of such a library only provides load and store
> functions. The API would look like this:
> >
> > uint64_t inline
> > rte_ctr64_get(const rte_ctr64_t *const ctr);
> >
> > void inline
> > rte_ctr64_set(rte_ctr64_t *const ctr, const uint64_t value);
> >
> > And if some CPU offers special instructions for increment or addition,
> faster (regarding performance) and/or more compact (regarding
> instruction memory) than a sequence of load-add-store instructions:
> >
> > void inline
> > rte_ctr64_inc(rte_ctr64_t *const ctr);
> >
> > void inline
> > rte_ctr64_add(rte_ctr64_t *const ctr, const uint64_t value);

Note: 32 bit architectures might achieve higher performance if the "value" parameter to rte_ctr64_add() is unsigned long (or unsigned int) instead of uint64_t.

> >
> > <feature creep>
> > And perhaps atomic variants of all these functions, with explicit
> and/or relaxed memory ordering, for counters shared by multiple threads.
> > </feature creep>
> >
> 
> 
> This kind of what I am experimenting with but...

Excellent!

> Intend to keep the details of the counters read and update in one file
> and not as inlines.

Please note that traffic management applications maintain many counters (e.g. per-flow, per-subscriber and per-link packet and byte counters, some also per QoS class), so rte_ctr64_add() must have the highest performance technically possible.

For reference, the packet scheduling code in our application updates 28 statistics counters per burst of packets. (In addition to internal state variables for e.g. queue lenghts.)

Furthermore, our application processes and displays live statistics with one second granularity, using a separate thread. Although statistics processing is not part of the fast path, the sheer number of counters processed requires high performance read access to those counters.


<more feature creep>
Some groups of counters are maintained locally in the inner loop, and then added in bulk to the "public" statistics afterwards. Conceptually:

struct stats_per_prio {
	uint64_t	forwarded_bytes;
	uint64_t	forwarded_packets;
	uint64_t	marked_bytes;
	uint64_t	marked_packets;
	uint64_t	dropped_bytes;
	uint64_t	dropped_packets;
};

If this is a common design pattern in DPDK (drivers, libraries and/or applications), perhaps also provide a performance optimized function for architectures offering vector instructions:

void inline
rte_ctr64_add_bulk(
		rte_ctr64_t *const ctrs,
		const unsigned long *const values,
		const unsigned int n /* compile time constant */);

This slightly resembles the Linux kernel's design pattern, where counters are updated in bulk, protected by a common lock for the bulk update. (However, DPDK has no lock, so the motivation for optimizing for this design pattern is only "nice to have".)

PS:
Many DPDK applications are 64 bit only, including the SmartShare appliances, and can easily manage 64 bit counters without all this.
However, if the DPDK project still considers 32 bit architectures first class citizens, 64 bit counters should have high performance on 32 bit architectures too.


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [RFC 0/3] generic sw counters
  2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
                     ` (2 preceding siblings ...)
  2024-05-10  5:01   ` [RFC 3/3] net/tap: use generic SW stats Stephen Hemminger
@ 2024-05-10 17:29   ` Morten Brørup
  2024-05-10 19:30     ` Stephen Hemminger
  2024-05-13 18:52   ` [RFC v2 0/7] generic SW counters Stephen Hemminger
                     ` (8 subsequent siblings)
  12 siblings, 1 reply; 179+ messages in thread
From: Morten Brørup @ 2024-05-10 17:29 UTC (permalink / raw)
  To: Stephen Hemminger, dev

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Friday, 10 May 2024 07.01
> 
> This is my attempt to demonstrate:
>   - generic counters for SW drivers, the example does af_packet and tap
>     but same should be applied to af_xdp, virtio, etc.
>   - counters are safe against 64 bit tearing on 32 bit platform

+1 for this RFC

> 
> The naming and organization could be improved:
>   - should this be in rte_ethdev?

I think the part for handling 64 bit counters should be moved out into a generic library, so it can also be used by libraries and applications. It can be extended with more features (feature creep) later. And if some architectures offer (more feature creep) optimized implementations, all 64 bit counters for those architectures will benefit from such improvements.

I like the concept of joining and generalizing the stats handling for SW drivers, to avoid copy-paste of detailed stats handing between SW drivers. Copy-paste is bad, common functions are good.

I'm somewhat skeptical about putting the stats structure first in the rte_eth_dev_data's tx_queues and rx_queues.
These are void* because their types are private to the PMD. Putting the stats structure first is somewhat a hack, partially removing that privacy.
Perhaps we can make it look less like a hack. After all, it is still a private structure type, only the first part is public and must be the same across drivers using the SW stats counters. Overlapping certain parts of a private structure with a public structure is a common design pattern; I've seen it used elsewhere in DPDK too.
If we get used to it for SW ethdev drivers, we might not consider it a hack anymore. ;-)

>   - better name for struct and vairables?
>   - audit and handle errors better.
> 
> Stephen Hemminger (3):
>   ethdev: add internal helper of SW driver statistics
>   net/af_packet: use SW stats helper
>   net/tap: use generic SW stats
> 
>  drivers/net/af_packet/rte_eth_af_packet.c |  97 ++-----
>  drivers/net/tap/rte_eth_tap.c             | 100 ++------
>  drivers/net/tap/rte_eth_tap.h             |  17 +-
>  lib/ethdev/ethdev_swstats.c               | 294 ++++++++++++++++++++++
>  lib/ethdev/ethdev_swstats.h               |  60 +++++
>  lib/ethdev/meson.build                    |   2 +
>  lib/ethdev/version.map                    |   7 +
>  7 files changed, 400 insertions(+), 177 deletions(-)
>  create mode 100644 lib/ethdev/ethdev_swstats.c
>  create mode 100644 lib/ethdev/ethdev_swstats.h
> 
> --
> 2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC 0/3] generic sw counters
  2024-05-10 17:29   ` [RFC 0/3] generic sw counters Morten Brørup
@ 2024-05-10 19:30     ` Stephen Hemminger
  0 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-10 19:30 UTC (permalink / raw)
  To: Morten Brørup; +Cc: dev

On Fri, 10 May 2024 19:29:48 +0200
Morten Brørup <mb@smartsharesystems.com> wrote:

> ad, common functions are good.
> 
> I'm somewhat skeptical about putting the stats structure first in the rte_eth_dev_data's tx_queues and rx_queues.
> These are void* because their types are private to the PMD. Putting the stats structure first is somewhat a hack, partially removing that privacy.
> Perhaps we can make it look less like a hack. After all, it is still a private structure type, only the first part is public and must be the same across drivers using the SW stats counters. Overlapping certain parts of a private structure with a public structure is a common design pattern; I've seen it used elsewhere in DPDK too.
> If we get used to it for SW ethdev drivers, we might not consider it a hack anymore. ;-)

The tradeoff here, is putting them first allows for writing generic version of eth_stats_get and eth_stats_reset.
Other option is having generic code take offset for tx/rx to find the stats.

C doesn't have RTI like C++ so this was best I could think of.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [RFC v2 0/7] generic SW counters
  2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
                     ` (3 preceding siblings ...)
  2024-05-10 17:29   ` [RFC 0/3] generic sw counters Morten Brørup
@ 2024-05-13 18:52   ` Stephen Hemminger
  2024-05-13 18:52     ` [RFC v2 1/7] eal: generic 64 bit counter Stephen Hemminger
                       ` (6 more replies)
  2024-05-14 15:35   ` [PATCH v3 0/7] Generic SW counters Stephen Hemminger
                     ` (7 subsequent siblings)
  12 siblings, 7 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-13 18:52 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

This is second attempt to demonstrate:
  - generic counters for SW drivers, the example does covers:
    af_packet, af_xdp, tap, ring but could be applied to virtio, etc.
  - counters are safe against 64 bit tearing on 32 bit platform
 
v2 - renamed and reorganized
   - put counters in eal, stats in ethdev internal functions
   - use offset to find stats in queue struct

Stephen Hemminger (3):
  ethdev: add internal helper of SW driver statistics
  net/af_packet: use SW stats helper
  net/tap: use generic SW stats

Stephen Hemminger (7):
  eal: generic 64 bit counter
  ethdev: add internal helper of SW driver statistics
  net/af_packet: use SW stats helper
  net/tap: use generic SW stats
  net/pcap: use generic SW stats
  net/af_xdp: use generic SW stats
  net/ring: use generic SW stats

 drivers/net/af_packet/rte_eth_af_packet.c |  95 +++-----
 drivers/net/af_xdp/rte_eth_af_xdp.c       | 115 ++++-----
 drivers/net/pcap/pcap_ethdev.c            | 146 ++++--------
 drivers/net/ring/rte_eth_ring.c           |  85 +++----
 drivers/net/tap/rte_eth_tap.c             | 102 +++-----
 drivers/net/tap/rte_eth_tap.h             |  15 +-
 lib/eal/include/meson.build               |   1 +
 lib/eal/include/rte_counter.h             |  91 ++++++++
 lib/ethdev/ethdev_swstats.c               | 270 ++++++++++++++++++++++
 lib/ethdev/ethdev_swstats.h               |  54 +++++
 lib/ethdev/meson.build                    |   2 +
 lib/ethdev/version.map                    |   8 +
 12 files changed, 624 insertions(+), 360 deletions(-)
 create mode 100644 lib/eal/include/rte_counter.h
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [RFC v2 1/7] eal: generic 64 bit counter
  2024-05-13 18:52   ` [RFC v2 0/7] generic SW counters Stephen Hemminger
@ 2024-05-13 18:52     ` Stephen Hemminger
  2024-05-13 19:36       ` Morten Brørup
  2024-05-13 18:52     ` [RFC v2 2/7] ethdev: add internal helper of SW driver statistics Stephen Hemminger
                       ` (5 subsequent siblings)
  6 siblings, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-13 18:52 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

This header implements 64 bit counters that are NOT atomic
but are safe against load/store splits on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/eal/include/meson.build   |  1 +
 lib/eal/include/rte_counter.h | 91 +++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+)
 create mode 100644 lib/eal/include/rte_counter.h

diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
index e94b056d46..c070dd0079 100644
--- a/lib/eal/include/meson.build
+++ b/lib/eal/include/meson.build
@@ -12,6 +12,7 @@ headers += files(
         'rte_class.h',
         'rte_common.h',
         'rte_compat.h',
+        'rte_counter.h',
         'rte_debug.h',
         'rte_dev.h',
         'rte_devargs.h',
diff --git a/lib/eal/include/rte_counter.h b/lib/eal/include/rte_counter.h
new file mode 100644
index 0000000000..1c1c34c2fb
--- /dev/null
+++ b/lib/eal/include/rte_counter.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_COUNTER_H_
+#define _RTE_COUNTER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Counter
+ *
+ * A counter is 64 bit value that is safe from split read/write
+ * on 32 bit platforms. It assumes that only one cpu at a time
+ * will update the counter, and another CPU may want to read it.
+ *
+ * This is a much weaker guarantee than @rte_atomic but is faster
+ * since no locked operations are required for update.
+ */
+
+#include <stdatomic.h>
+
+#ifdef RTE_ARCH_64
+/*
+ * On a platform that can support native 64 bit type, no special handling.
+ * These are just wrapper around 64 bit value.
+ */
+typedef uint64_t rte_counter64_t;
+
+/**
+ * Add value to counter.
+ */
+__rte_experimental
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	*counter += val;
+}
+
+__rte_experimental
+static inline uint64_t
+rte_counter64_fetch(const rte_counter64_t *counter)
+{
+	return *counter;
+}
+
+__rte_experimental
+static inline void
+rte_counter64_reset(rte_counter64_t *counter)
+{
+	*counter = 0;
+}
+
+#else
+/*
+ * On a 32 bit platform need to use atomic to force the compler to not
+ * split 64 bit read/write.
+ */
+typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
+
+__rte_experimental
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
+}
+
+__rte_experimental
+static inline uint64_t
+rte_counter64_fetch(rte_counter64_t *counter)
+{
+	return rte_atomic_load_explicit(counter, rte_memory_order_relaxed);
+}
+
+__rte_experimental
+static inline void
+rte_counter64_reset(rte_counter64_t *counter)
+{
+	rte_atomic_store_explicit(counter, 0, rte_memory_order_relaxed);
+}
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_COUNTER_H_ */
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [RFC v2 2/7] ethdev: add internal helper of SW driver statistics
  2024-05-13 18:52   ` [RFC v2 0/7] generic SW counters Stephen Hemminger
  2024-05-13 18:52     ` [RFC v2 1/7] eal: generic 64 bit counter Stephen Hemminger
@ 2024-05-13 18:52     ` Stephen Hemminger
  2024-05-13 18:52     ` [RFC v2 3/7] net/af_packet: use SW stats helper Stephen Hemminger
                       ` (4 subsequent siblings)
  6 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-13 18:52 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko

This clones the staistic update code from virtio for use
by other drivers. It also uses native uint64_t on 64 bit platform
but atomic operations on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

ethdev: use atomic on 32
---
 lib/ethdev/ethdev_swstats.c | 270 ++++++++++++++++++++++++++++++++++++
 lib/ethdev/ethdev_swstats.h |  54 ++++++++
 lib/ethdev/meson.build      |   2 +
 lib/ethdev/version.map      |   8 ++
 4 files changed, 334 insertions(+)
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

diff --git a/lib/ethdev/ethdev_swstats.c b/lib/ethdev/ethdev_swstats.c
new file mode 100644
index 0000000000..4c0fa36ac3
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.c
@@ -0,0 +1,270 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#include <stdbool.h>
+
+#include <rte_common.h>
+#include <ethdev_driver.h>
+
+#include "rte_ethdev.h"
+#include "ethdev_swstats.h"
+
+static void
+eth_counters_reset(struct rte_eth_counters *counters)
+{
+	unsigned int i;
+
+	rte_counter64_reset(&counters->packets);
+	rte_counter64_reset(&counters->bytes);
+	rte_counter64_reset(&counters->multicast);
+	rte_counter64_reset(&counters->broadcast);
+
+	for (i = 0; i < RTE_DIM(counters->size_bins); i++)
+		rte_counter64_reset(&counters->size_bins[i]);
+}
+
+void
+rte_eth_count_packet(struct rte_eth_counters *counters,  uint32_t sz)
+{
+	uint32_t bin;
+
+	if (sz == 64) {
+		bin = 1;
+	} else if (sz > 64 && sz < 1024) {
+		/* count zeros, and offset into correct bin */
+		bin = (sizeof(sz) * 8) - rte_clz32(sz) - 5;
+	} else if (sz < 64) {
+		bin = 0;
+	} else if (sz < 1519) {
+		bin = 6;
+	} else {
+		bin = 7;
+	}
+
+	rte_counter64_add(&counters->packets, 1);
+	rte_counter64_add(&counters->bytes, sz);
+	rte_counter64_add(&counters->size_bins[bin], 1);
+}
+
+void
+rte_eth_count_mbuf(struct rte_eth_counters *counters, const struct rte_mbuf *mbuf)
+{
+	const struct rte_ether_addr *ea;
+
+	rte_eth_count_packet(counters, rte_pktmbuf_pkt_len(mbuf));
+
+	ea = rte_pktmbuf_mtod(mbuf, const struct rte_ether_addr *);
+	if (rte_is_multicast_ether_addr(ea)) {
+		if (rte_is_broadcast_ether_addr(ea))
+			rte_counter64_add(&counters->broadcast, 1);
+		else
+			rte_counter64_add(&counters->multicast, 1);
+	}
+}
+
+void
+rte_eth_count_error(struct rte_eth_counters *counters)
+{
+	rte_counter64_add(&counters->errors, 1);
+}
+
+int
+rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset,
+			   struct rte_eth_stats *stats)
+{
+	unsigned int i;
+	uint64_t packets, bytes, errors;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		const void *txq = dev->data->tx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (txq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)txq + tx_offset);
+		packets = rte_counter64_fetch(&counters->packets);
+		bytes = rte_counter64_fetch(&counters->bytes);
+		errors = rte_counter64_fetch(&counters->errors);
+
+		stats->opackets += packets;
+		stats->obytes += bytes;
+		stats->oerrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_opackets[i] = packets;
+			stats->q_obytes[i] = bytes;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		const void *rxq = dev->data->rx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)rxq + rx_offset);
+		packets = rte_counter64_fetch(&counters->packets);
+		bytes = rte_counter64_fetch(&counters->bytes);
+		errors = rte_counter64_fetch(&counters->errors);
+
+		stats->ipackets += packets;
+		stats->ibytes += bytes;
+		stats->ierrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_ipackets[i] = packets;
+			stats->q_ibytes[i] = bytes;
+		}
+	}
+
+	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
+	return 0;
+}
+
+int
+rte_eth_counters_reset(struct rte_eth_dev *dev, size_t tx_offset, size_t rx_offset)
+{
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		void *txq  = dev->data->tx_queues[i];
+		struct rte_eth_counters *counters;
+
+		if (txq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)txq + tx_offset);
+		eth_counters_reset(counters);
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		void *rxq  = dev->data->rx_queues[i];
+		struct rte_eth_counters *counters;
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)rxq + rx_offset);
+		eth_counters_reset(counters);
+	}
+
+	return 0;
+}
+
+struct xstats_name_off {
+	char name[RTE_ETH_XSTATS_NAME_SIZE];
+	size_t offset;
+};
+
+/* [rt]x_qX_ is prepended to the name string here */
+static const struct xstats_name_off eth_swstats_strings[] = {
+	{"good_packets",           offsetof(struct rte_eth_counters, packets)},
+	{"good_bytes",             offsetof(struct rte_eth_counters, bytes)},
+	{"errors",                 offsetof(struct rte_eth_counters, errors)},
+	{"multicast_packets",      offsetof(struct rte_eth_counters, multicast)},
+	{"broadcast_packets",      offsetof(struct rte_eth_counters, broadcast)},
+	{"undersize_packets",      offsetof(struct rte_eth_counters, size_bins[0])},
+	{"size_64_packets",        offsetof(struct rte_eth_counters, size_bins[1])},
+	{"size_65_127_packets",    offsetof(struct rte_eth_counters, size_bins[2])},
+	{"size_128_255_packets",   offsetof(struct rte_eth_counters, size_bins[3])},
+	{"size_256_511_packets",   offsetof(struct rte_eth_counters, size_bins[4])},
+	{"size_512_1023_packets",  offsetof(struct rte_eth_counters, size_bins[5])},
+	{"size_1024_1518_packets", offsetof(struct rte_eth_counters, size_bins[6])},
+	{"size_1519_max_packets",  offsetof(struct rte_eth_counters, size_bins[7])},
+};
+#define NUM_SWSTATS_XSTATS RTE_DIM(eth_swstats_strings)
+
+
+int
+rte_eth_counters_xstats_get_names(struct rte_eth_dev *dev,
+				  struct rte_eth_xstat_name *xstats_names)
+{
+	unsigned int i, t, count = 0;
+
+	if (xstats_names == NULL)
+		return (dev->data->nb_tx_queues + dev->data->nb_rx_queues) * NUM_SWSTATS_XSTATS;
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		const void *rxq = dev->data->rx_queues[i];
+
+		if (rxq == NULL)
+			continue;
+
+		for (t = 0; t < NUM_SWSTATS_XSTATS; t++) {
+			snprintf(xstats_names[count].name, sizeof(xstats_names[count].name),
+				 "rx_q%u_%s", i, eth_swstats_strings[t].name);
+			count++;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		const void *txq = dev->data->tx_queues[i];
+
+		if (txq == NULL)
+			continue;
+
+		for (t = 0; t < NUM_SWSTATS_XSTATS; t++) {
+			snprintf(xstats_names[count].name, sizeof(xstats_names[count].name),
+				 "tx_q%u_%s", i, eth_swstats_strings[t].name);
+			count++;
+		}
+	}
+	return count;
+}
+
+int
+rte_eth_counters_xstats_get(struct rte_eth_dev *dev,
+			    size_t tx_offset, size_t rx_offset,
+			    struct rte_eth_xstat *xstats, unsigned int n)
+{
+	unsigned int i, t, count = 0;
+	const unsigned int nstats
+		= (dev->data->nb_tx_queues + dev->data->nb_rx_queues) * NUM_SWSTATS_XSTATS;
+
+	if (n < nstats)
+		return nstats;
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		const void *rxq = dev->data->rx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)rxq + rx_offset);
+		for (t = 0; t < NUM_SWSTATS_XSTATS; t++) {
+			const uint64_t *valuep
+				= (const uint64_t *)((const char *)counters
+						     + eth_swstats_strings[t].offset);
+
+			xstats[count].value = *valuep;
+			xstats[count].id = count;
+			++count;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		const void *txq = dev->data->tx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (txq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)txq + tx_offset);
+		for (t = 0; t < NUM_SWSTATS_XSTATS; t++) {
+			const uint64_t *valuep
+				= (const uint64_t *)((const char *)counters
+						     + eth_swstats_strings[t].offset);
+
+			xstats[count].value = *valuep;
+			xstats[count].id = count;
+			++count;
+		}
+	}
+
+	return count;
+}
diff --git a/lib/ethdev/ethdev_swstats.h b/lib/ethdev/ethdev_swstats.h
new file mode 100644
index 0000000000..45b419b887
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_ETHDEV_SWSTATS_H_
+#define _RTE_ETHDEV_SWSTATS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_counter.h>
+
+struct rte_eth_counters {
+	rte_counter64_t	packets;
+	rte_counter64_t	bytes;
+	rte_counter64_t	errors;
+	rte_counter64_t	multicast;
+	rte_counter64_t	broadcast;
+	/* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */
+	rte_counter64_t	size_bins[8];
+};
+
+__rte_internal
+void rte_eth_count_packet(struct rte_eth_counters *counters, uint32_t size);
+
+__rte_internal
+void rte_eth_count_mbuf(struct rte_eth_counters *counters, const struct rte_mbuf *mbuf);
+
+__rte_internal
+void rte_eth_count_error(struct rte_eth_counters *stats);
+
+__rte_internal
+int rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			       size_t tx_offset, size_t rx_offset,
+			       struct rte_eth_stats *stats);
+
+__rte_internal
+int rte_eth_counters_reset(struct rte_eth_dev *dev,
+				size_t tx_offset, size_t rx_offset);
+
+__rte_internal
+int rte_eth_counters_xstats_get_names(struct rte_eth_dev *dev,
+				      struct rte_eth_xstat_name *xstats_names);
+__rte_internal
+int rte_eth_counters_xstats_get(struct rte_eth_dev *dev,
+				size_t tx_offset, size_t rx_offset,
+				struct rte_eth_xstat *xstats, unsigned int n);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHDEV_SWSTATS_H_ */
diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build
index f1d2586591..7ce29a46d4 100644
--- a/lib/ethdev/meson.build
+++ b/lib/ethdev/meson.build
@@ -3,6 +3,7 @@
 
 sources = files(
         'ethdev_driver.c',
+        'ethdev_swstats.c',
         'ethdev_private.c',
         'ethdev_profile.c',
         'ethdev_trace_points.c',
@@ -42,6 +43,7 @@ driver_sdk_headers += files(
         'ethdev_driver.h',
         'ethdev_pci.h',
         'ethdev_vdev.h',
+        'ethdev_swstats.h',
 )
 
 if is_linux
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 79f6f5293b..1ca53e2c5d 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -358,4 +358,12 @@ INTERNAL {
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
 	rte_flow_fp_default_ops;
+
+	rte_eth_count_error;
+	rte_eth_count_mbuf;
+	rte_eth_count_packet;
+	rte_eth_counters_reset;
+	rte_eth_counters_stats_get;
+	rte_eth_counters_xstats_get;
+	rte_eth_counters_xstats_get_names;
 };
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [RFC v2 3/7] net/af_packet: use SW stats helper
  2024-05-13 18:52   ` [RFC v2 0/7] generic SW counters Stephen Hemminger
  2024-05-13 18:52     ` [RFC v2 1/7] eal: generic 64 bit counter Stephen Hemminger
  2024-05-13 18:52     ` [RFC v2 2/7] ethdev: add internal helper of SW driver statistics Stephen Hemminger
@ 2024-05-13 18:52     ` Stephen Hemminger
  2024-05-13 18:52     ` [RFC v2 4/7] net/tap: use generic SW stats Stephen Hemminger
                       ` (3 subsequent siblings)
  6 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-13 18:52 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, John W. Linville

Use the new generic SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_packet/rte_eth_af_packet.c | 95 +++++++----------------
 1 file changed, 29 insertions(+), 66 deletions(-)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 397a32db58..2d42f3e723 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -10,6 +10,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
@@ -29,6 +30,7 @@
 #include <unistd.h>
 #include <poll.h>
 
+
 #define ETH_AF_PACKET_IFACE_ARG		"iface"
 #define ETH_AF_PACKET_NUM_Q_ARG		"qpairs"
 #define ETH_AF_PACKET_BLOCKSIZE_ARG	"blocksz"
@@ -51,8 +53,7 @@ struct pkt_rx_queue {
 	uint16_t in_port;
 	uint8_t vlan_strip;
 
-	volatile unsigned long rx_pkts;
-	volatile unsigned long rx_bytes;
+	struct rte_eth_counters stats;
 };
 
 struct pkt_tx_queue {
@@ -64,11 +65,10 @@ struct pkt_tx_queue {
 	unsigned int framecount;
 	unsigned int framenum;
 
-	volatile unsigned long tx_pkts;
-	volatile unsigned long err_pkts;
-	volatile unsigned long tx_bytes;
+	struct rte_eth_counters stats;
 };
 
+
 struct pmd_internals {
 	unsigned nb_queues;
 
@@ -118,8 +118,6 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	uint8_t *pbuf;
 	struct pkt_rx_queue *pkt_q = queue;
-	uint16_t num_rx = 0;
-	unsigned long num_rx_bytes = 0;
 	unsigned int framecount, framenum;
 
 	if (unlikely(nb_pkts == 0))
@@ -164,13 +162,11 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* account for the receive frame */
 		bufs[i] = mbuf;
-		num_rx++;
-		num_rx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&pkt_q->stats, mbuf);
 	}
 	pkt_q->framenum = framenum;
-	pkt_q->rx_pkts += num_rx;
-	pkt_q->rx_bytes += num_rx_bytes;
-	return num_rx;
+
+	return i;
 }
 
 /*
@@ -205,8 +201,6 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	unsigned int framecount, framenum;
 	struct pollfd pfd;
 	struct pkt_tx_queue *pkt_q = queue;
-	uint16_t num_tx = 0;
-	unsigned long num_tx_bytes = 0;
 	int i;
 
 	if (unlikely(nb_pkts == 0))
@@ -285,8 +279,7 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			framenum = 0;
 		ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base;
 
-		num_tx++;
-		num_tx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&pkt_q->stats, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -298,15 +291,9 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		 * packets will be considered successful even though only some
 		 * are sent.
 		 */
-
-		num_tx = 0;
-		num_tx_bytes = 0;
 	}
 
 	pkt_q->framenum = framenum;
-	pkt_q->tx_pkts += num_tx;
-	pkt_q->err_pkts += i - num_tx;
-	pkt_q->tx_bytes += num_tx_bytes;
 	return i;
 }
 
@@ -386,58 +373,31 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
-		igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
-		rx_total += igb_stats->q_ipackets[i];
-		rx_bytes_total += igb_stats->q_ibytes[i];
-	}
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
-		igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
-		tx_total += igb_stats->q_opackets[i];
-		tx_err_total += internal->tx_queue[i].err_pkts;
-		tx_bytes_total += igb_stats->q_obytes[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->ibytes = rx_bytes_total;
-	igb_stats->opackets = tx_total;
-	igb_stats->oerrors = tx_err_total;
-	igb_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+					  offsetof(struct pkt_rx_queue, stats), stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->rx_queue[i].rx_pkts = 0;
-		internal->rx_queue[i].rx_bytes = 0;
-	}
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
+}
 
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->tx_queue[i].tx_pkts = 0;
-		internal->tx_queue[i].err_pkts = 0;
-		internal->tx_queue[i].tx_bytes = 0;
-	}
+static int eth_xstats_get_names(struct rte_eth_dev *dev,
+				struct rte_eth_xstat_name *names,
+				__rte_unused unsigned int limit)
+{
+	return rte_eth_counters_xstats_get_names(dev, names);
+}
 
-	return 0;
+static int
+eth_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, unsigned int n)
+{
+	return rte_eth_counters_xstats_get(dev, offsetof(struct pkt_tx_queue, stats),
+					   offsetof(struct pkt_rx_queue, stats), xstats, n);
 }
 
 static int
@@ -636,6 +596,9 @@ static const struct eth_dev_ops ops = {
 	.link_update = eth_link_update,
 	.stats_get = eth_stats_get,
 	.stats_reset = eth_stats_reset,
+	.xstats_get = eth_xstats_get,
+	.xstats_get_names = eth_xstats_get_names,
+	.xstats_reset = eth_stats_reset,
 };
 
 /*
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [RFC v2 4/7] net/tap: use generic SW stats
  2024-05-13 18:52   ` [RFC v2 0/7] generic SW counters Stephen Hemminger
                       ` (2 preceding siblings ...)
  2024-05-13 18:52     ` [RFC v2 3/7] net/af_packet: use SW stats helper Stephen Hemminger
@ 2024-05-13 18:52     ` Stephen Hemminger
  2024-05-13 18:52     ` [RFC v2 5/7] net/pcap: " Stephen Hemminger
                       ` (2 subsequent siblings)
  6 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-13 18:52 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use new common sw statistics.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/tap/rte_eth_tap.c | 102 +++++++++++-----------------------
 drivers/net/tap/rte_eth_tap.h |  15 +----
 2 files changed, 34 insertions(+), 83 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 69d9da695b..ae1000a088 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -432,7 +432,6 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rx_queue *rxq = queue;
 	struct pmd_process_private *process_private;
 	uint16_t num_rx;
-	unsigned long num_rx_bytes = 0;
 	uint32_t trigger = tap_trigger;
 
 	if (trigger == rxq->trigger_seen)
@@ -455,7 +454,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* Packet couldn't fit in the provided mbuf */
 		if (unlikely(rxq->pi.flags & TUN_PKT_STRIP)) {
-			rxq->stats.ierrors++;
+			rte_eth_count_error(&rxq->stats);
 			continue;
 		}
 
@@ -467,7 +466,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
 
 			if (unlikely(!buf)) {
-				rxq->stats.rx_nombuf++;
+				struct rte_eth_dev *dev = &rte_eth_devices[rxq->in_port];
+				++dev->data->rx_mbuf_alloc_failed;
+
 				/* No new buf has been allocated: do nothing */
 				if (!new_tail || !seg)
 					goto end;
@@ -509,11 +510,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* account for the receive frame */
 		bufs[num_rx++] = mbuf;
-		num_rx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&rxq->stats, mbuf);
 	}
 end:
-	rxq->stats.ipackets += num_rx;
-	rxq->stats.ibytes += num_rx_bytes;
 
 	if (trigger && num_rx < nb_pkts)
 		rxq->trigger_seen = trigger;
@@ -523,8 +522,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 static inline int
 tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
-			struct rte_mbuf **pmbufs,
-			uint16_t *num_packets, unsigned long *num_tx_bytes)
+		struct rte_mbuf **pmbufs)
 {
 	struct pmd_process_private *process_private;
 	int i;
@@ -647,8 +645,7 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
 		if (n <= 0)
 			return -1;
 
-		(*num_packets)++;
-		(*num_tx_bytes) += rte_pktmbuf_pkt_len(mbuf);
+		rte_eth_count_mbuf(&txq->stats, mbuf);
 	}
 	return 0;
 }
@@ -660,8 +657,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	struct tx_queue *txq = queue;
 	uint16_t num_tx = 0;
-	uint16_t num_packets = 0;
-	unsigned long num_tx_bytes = 0;
 	uint32_t max_size;
 	int i;
 
@@ -693,7 +688,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			tso_segsz = mbuf_in->tso_segsz + hdrs_len;
 			if (unlikely(tso_segsz == hdrs_len) ||
 				tso_segsz > *txq->mtu) {
-				txq->stats.errs++;
+				rte_eth_count_error(&txq->stats);
 				break;
 			}
 			gso_ctx->gso_size = tso_segsz;
@@ -728,10 +723,10 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			num_mbufs = 1;
 		}
 
-		ret = tap_write_mbufs(txq, num_mbufs, mbuf,
-				&num_packets, &num_tx_bytes);
+		ret = tap_write_mbufs(txq, num_mbufs, mbuf);
 		if (ret == -1) {
-			txq->stats.errs++;
+			rte_eth_count_error(&txq->stats);
+
 			/* free tso mbufs */
 			if (num_tso_mbufs > 0)
 				rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs);
@@ -749,10 +744,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		}
 	}
 
-	txq->stats.opackets += num_packets;
-	txq->stats.errs += nb_pkts - num_tx;
-	txq->stats.obytes += num_tx_bytes;
-
 	return num_tx;
 }
 
@@ -1055,64 +1046,30 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
 {
-	unsigned int i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	unsigned long rx_nombuf = 0, ierrors = 0;
-	const struct pmd_internals *pmd = dev->data->dev_private;
-
-	/* rx queue statistics */
-	imax = (dev->data->nb_rx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_rx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
-		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
-		rx_total += tap_stats->q_ipackets[i];
-		rx_bytes_total += tap_stats->q_ibytes[i];
-		rx_nombuf += pmd->rxq[i].stats.rx_nombuf;
-		ierrors += pmd->rxq[i].stats.ierrors;
-	}
-
-	/* tx queue statistics */
-	imax = (dev->data->nb_tx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_tx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
-		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
-		tx_total += tap_stats->q_opackets[i];
-		tx_err_total += pmd->txq[i].stats.errs;
-		tx_bytes_total += tap_stats->q_obytes[i];
-	}
-
-	tap_stats->ipackets = rx_total;
-	tap_stats->ibytes = rx_bytes_total;
-	tap_stats->ierrors = ierrors;
-	tap_stats->rx_nombuf = rx_nombuf;
-	tap_stats->opackets = tx_total;
-	tap_stats->oerrors = tx_err_total;
-	tap_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct tx_queue, stats),
+					  offsetof(struct rx_queue, stats), tap_stats);
 }
 
 static int
 tap_stats_reset(struct rte_eth_dev *dev)
 {
-	int i;
-	struct pmd_internals *pmd = dev->data->dev_private;
-
-	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-		pmd->rxq[i].stats.ipackets = 0;
-		pmd->rxq[i].stats.ibytes = 0;
-		pmd->rxq[i].stats.ierrors = 0;
-		pmd->rxq[i].stats.rx_nombuf = 0;
+	return rte_eth_counters_reset(dev, offsetof(struct tx_queue, stats),
+				      offsetof(struct rx_queue, stats));
+}
 
-		pmd->txq[i].stats.opackets = 0;
-		pmd->txq[i].stats.errs = 0;
-		pmd->txq[i].stats.obytes = 0;
-	}
+static int
+tap_xstats_get_names(struct rte_eth_dev *dev,
+				struct rte_eth_xstat_name *names,
+				__rte_unused unsigned int limit)
+{
+	return rte_eth_counters_xstats_get_names(dev, names);
+}
 
-	return 0;
+static int
+tap_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, unsigned int n)
+{
+	return rte_eth_counters_xstats_get(dev, offsetof(struct tx_queue, stats),
+					   offsetof(struct rx_queue, stats), xstats, n);
 }
 
 static int
@@ -1919,6 +1876,9 @@ static const struct eth_dev_ops ops = {
 	.set_mc_addr_list       = tap_set_mc_addr_list,
 	.stats_get              = tap_stats_get,
 	.stats_reset            = tap_stats_reset,
+	.xstats_get_names       = tap_xstats_get_names,
+	.xstats_get             = tap_xstats_get,
+	.xstats_reset            = tap_stats_reset,
 	.dev_supported_ptypes_get = tap_dev_supported_ptypes_get,
 	.rss_hash_update        = tap_rss_hash_update,
 	.flow_ops_get           = tap_dev_flow_ops_get,
diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
index 5ac93f93e9..8cba9ea410 100644
--- a/drivers/net/tap/rte_eth_tap.h
+++ b/drivers/net/tap/rte_eth_tap.h
@@ -14,6 +14,7 @@
 #include <linux/if_tun.h>
 
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_ether.h>
 #include <rte_gso.h>
 #include "tap_log.h"
@@ -32,23 +33,13 @@ enum rte_tuntap_type {
 	ETH_TUNTAP_TYPE_MAX,
 };
 
-struct pkt_stats {
-	uint64_t opackets;              /* Number of output packets */
-	uint64_t ipackets;              /* Number of input packets */
-	uint64_t obytes;                /* Number of bytes on output */
-	uint64_t ibytes;                /* Number of bytes on input */
-	uint64_t errs;                  /* Number of TX error packets */
-	uint64_t ierrors;               /* Number of RX error packets */
-	uint64_t rx_nombuf;             /* Nb of RX mbuf alloc failures */
-};
-
 struct rx_queue {
 	struct rte_mempool *mp;         /* Mempool for RX packets */
 	uint32_t trigger_seen;          /* Last seen Rx trigger value */
 	uint16_t in_port;               /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-	struct pkt_stats stats;         /* Stats for this RX queue */
 	uint16_t nb_rx_desc;            /* max number of mbufs available */
+	struct rte_eth_counters stats;  /* Stats for this RX queue */
 	struct rte_eth_rxmode *rxmode;  /* RX features */
 	struct rte_mbuf *pool;          /* mbufs pool for this queue */
 	struct iovec (*iovecs)[];       /* descriptors for this queue */
@@ -59,7 +50,7 @@ struct tx_queue {
 	int type;                       /* Type field - TUN|TAP */
 	uint16_t *mtu;                  /* Pointer to MTU from dev_data */
 	uint16_t csum:1;                /* Enable checksum offloading */
-	struct pkt_stats stats;         /* Stats for this TX queue */
+	struct rte_eth_counters stats;	/* Stats for this TX queue */
 	struct rte_gso_ctx gso_ctx;     /* GSO context */
 	uint16_t out_port;              /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [RFC v2 5/7] net/pcap: use generic SW stats
  2024-05-13 18:52   ` [RFC v2 0/7] generic SW counters Stephen Hemminger
                       ` (3 preceding siblings ...)
  2024-05-13 18:52     ` [RFC v2 4/7] net/tap: use generic SW stats Stephen Hemminger
@ 2024-05-13 18:52     ` Stephen Hemminger
  2024-05-13 18:52     ` [RFC v2 6/7] net/af_xdp: " Stephen Hemminger
  2024-05-13 18:52     ` [RFC v2 7/7] net/ring: " Stephen Hemminger
  6 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-13 18:52 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use common statistics for SW drivers.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/pcap/pcap_ethdev.c | 146 +++++++++++----------------------
 1 file changed, 47 insertions(+), 99 deletions(-)

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index bfec085045..872a3ed9a4 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -11,6 +11,7 @@
 
 #include <rte_cycles.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <ethdev_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_malloc.h>
@@ -48,13 +49,6 @@ static uint8_t iface_idx;
 static uint64_t timestamp_rx_dynflag;
 static int timestamp_dynfield_offset = -1;
 
-struct queue_stat {
-	volatile unsigned long pkts;
-	volatile unsigned long bytes;
-	volatile unsigned long err_pkts;
-	volatile unsigned long rx_nombuf;
-};
-
 struct queue_missed_stat {
 	/* last value retrieved from pcap */
 	unsigned int pcap;
@@ -68,7 +62,7 @@ struct pcap_rx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
 	struct rte_mempool *mb_pool;
-	struct queue_stat rx_stat;
+	struct rte_eth_counters rx_stat;
 	struct queue_missed_stat missed_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
@@ -80,7 +74,7 @@ struct pcap_rx_queue {
 struct pcap_tx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
-	struct queue_stat tx_stat;
+	struct rte_eth_counters tx_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
 };
@@ -238,7 +232,6 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	int i;
 	struct pcap_rx_queue *pcap_q = queue;
-	uint32_t rx_bytes = 0;
 
 	if (unlikely(nb_pkts == 0))
 		return 0;
@@ -252,39 +245,35 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		if (err)
 			return i;
 
+		rte_eth_count_mbuf(&pcap_q->rx_stat, pcap_buf);
+
 		rte_memcpy(rte_pktmbuf_mtod(bufs[i], void *),
 				rte_pktmbuf_mtod(pcap_buf, void *),
 				pcap_buf->data_len);
 		bufs[i]->data_len = pcap_buf->data_len;
 		bufs[i]->pkt_len = pcap_buf->pkt_len;
 		bufs[i]->port = pcap_q->port_id;
-		rx_bytes += pcap_buf->data_len;
+
 
 		/* Enqueue packet back on ring to allow infinite rx. */
 		rte_ring_enqueue(pcap_q->pkts, pcap_buf);
 	}
 
-	pcap_q->rx_stat.pkts += i;
-	pcap_q->rx_stat.bytes += rx_bytes;
-
 	return i;
 }
 
 static uint16_t
 eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
+	struct pcap_rx_queue *pcap_q = queue;
+	struct rte_eth_dev *dev = &rte_eth_devices[pcap_q->port_id];
+	struct pmd_process_private *pp = dev->process_private;
+	pcap_t *pcap = pp->rx_pcap[pcap_q->queue_id];
 	unsigned int i;
 	struct pcap_pkthdr header;
-	struct pmd_process_private *pp;
 	const u_char *packet;
 	struct rte_mbuf *mbuf;
-	struct pcap_rx_queue *pcap_q = queue;
 	uint16_t num_rx = 0;
-	uint32_t rx_bytes = 0;
-	pcap_t *pcap;
-
-	pp = rte_eth_devices[pcap_q->port_id].process_private;
-	pcap = pp->rx_pcap[pcap_q->queue_id];
 
 	if (unlikely(pcap == NULL || nb_pkts == 0))
 		return 0;
@@ -300,7 +289,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		mbuf = rte_pktmbuf_alloc(pcap_q->mb_pool);
 		if (unlikely(mbuf == NULL)) {
-			pcap_q->rx_stat.rx_nombuf++;
+			++dev->data->rx_mbuf_alloc_failed;
 			break;
 		}
 
@@ -315,7 +304,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 						       mbuf,
 						       packet,
 						       header.caplen) == -1)) {
-				pcap_q->rx_stat.err_pkts++;
+				rte_eth_count_error(&pcap_q->rx_stat);
 				rte_pktmbuf_free(mbuf);
 				break;
 			}
@@ -329,11 +318,10 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		mbuf->ol_flags |= timestamp_rx_dynflag;
 		mbuf->port = pcap_q->port_id;
 		bufs[num_rx] = mbuf;
+
+		rte_eth_count_mbuf(&pcap_q->rx_stat, mbuf);
 		num_rx++;
-		rx_bytes += header.caplen;
 	}
-	pcap_q->rx_stat.pkts += num_rx;
-	pcap_q->rx_stat.bytes += rx_bytes;
 
 	return num_rx;
 }
@@ -379,8 +367,6 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *dumper_q = queue;
-	uint16_t num_tx = 0;
-	uint32_t tx_bytes = 0;
 	struct pcap_pkthdr header;
 	pcap_dumper_t *dumper;
 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
@@ -412,8 +398,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		pcap_dump((u_char *)dumper, &header,
 			rte_pktmbuf_read(mbuf, 0, caplen, temp_data));
 
-		num_tx++;
-		tx_bytes += caplen;
+		rte_eth_count_mbuf(&dumper_q->tx_stat, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -423,9 +408,6 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	 * we flush the pcap dumper within each burst.
 	 */
 	pcap_dump_flush(dumper);
-	dumper_q->tx_stat.pkts += num_tx;
-	dumper_q->tx_stat.bytes += tx_bytes;
-	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
 
 	return nb_pkts;
 }
@@ -437,20 +419,16 @@ static uint16_t
 eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	unsigned int i;
-	uint32_t tx_bytes = 0;
 	struct pcap_tx_queue *tx_queue = queue;
 
 	if (unlikely(nb_pkts == 0))
 		return 0;
 
 	for (i = 0; i < nb_pkts; i++) {
-		tx_bytes += bufs[i]->pkt_len;
+		rte_eth_count_mbuf(&tx_queue->tx_stat, bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	tx_queue->tx_stat.pkts += nb_pkts;
-	tx_queue->tx_stat.bytes += tx_bytes;
-
 	return i;
 }
 
@@ -465,8 +443,6 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *tx_queue = queue;
-	uint16_t num_tx = 0;
-	uint32_t tx_bytes = 0;
 	pcap_t *pcap;
 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
 	size_t len;
@@ -497,15 +473,11 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			rte_pktmbuf_read(mbuf, 0, len, temp_data), len);
 		if (unlikely(ret != 0))
 			break;
-		num_tx++;
-		tx_bytes += len;
+
+		rte_eth_count_mbuf(&tx_queue->tx_stat, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
-	tx_queue->tx_stat.pkts += num_tx;
-	tx_queue->tx_stat.bytes += tx_bytes;
-	tx_queue->tx_stat.err_pkts += i - num_tx;
-
 	return i;
 }
 
@@ -746,41 +718,12 @@ static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned int i;
-	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
-	unsigned long rx_missed_total = 0;
-	unsigned long rx_nombuf_total = 0, rx_err_total = 0;
-	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
-	unsigned long tx_packets_err_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_queue[i].rx_stat.pkts;
-		stats->q_ibytes[i] = internal->rx_queue[i].rx_stat.bytes;
-		rx_nombuf_total += internal->rx_queue[i].rx_stat.rx_nombuf;
-		rx_err_total += internal->rx_queue[i].rx_stat.err_pkts;
-		rx_packets_total += stats->q_ipackets[i];
-		rx_bytes_total += stats->q_ibytes[i];
-		rx_missed_total += queue_missed_stat_get(dev, i);
-	}
 
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_queue[i].tx_stat.pkts;
-		stats->q_obytes[i] = internal->tx_queue[i].tx_stat.bytes;
-		tx_packets_total += stats->q_opackets[i];
-		tx_bytes_total += stats->q_obytes[i];
-		tx_packets_err_total += internal->tx_queue[i].tx_stat.err_pkts;
-	}
+	rte_eth_counters_stats_get(dev, offsetof(struct pcap_tx_queue, tx_stat),
+				   offsetof(struct pcap_rx_queue, rx_stat), stats);
 
-	stats->ipackets = rx_packets_total;
-	stats->ibytes = rx_bytes_total;
-	stats->imissed = rx_missed_total;
-	stats->ierrors = rx_err_total;
-	stats->rx_nombuf = rx_nombuf_total;
-	stats->opackets = tx_packets_total;
-	stats->obytes = tx_bytes_total;
-	stats->oerrors = tx_packets_err_total;
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		stats->imissed += queue_missed_stat_get(dev, i);
 
 	return 0;
 }
@@ -789,25 +732,34 @@ static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
 	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		internal->rx_queue[i].rx_stat.pkts = 0;
-		internal->rx_queue[i].rx_stat.bytes = 0;
-		internal->rx_queue[i].rx_stat.err_pkts = 0;
-		internal->rx_queue[i].rx_stat.rx_nombuf = 0;
-		queue_missed_stat_reset(dev, i);
-	}
+	rte_eth_counters_reset(dev, offsetof(struct pcap_tx_queue, tx_stat),
+			       offsetof(struct pcap_rx_queue, rx_stat));
 
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		internal->tx_queue[i].tx_stat.pkts = 0;
-		internal->tx_queue[i].tx_stat.bytes = 0;
-		internal->tx_queue[i].tx_stat.err_pkts = 0;
-	}
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		queue_missed_stat_reset(dev, i);
 
 	return 0;
 }
 
+static int
+eth_xstats_get_names(struct rte_eth_dev *dev,
+		     struct rte_eth_xstat_name *names,
+		     __rte_unused unsigned int limit)
+{
+	return rte_eth_counters_xstats_get_names(dev, names);
+}
+
+static int
+eth_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
+	       unsigned int n)
+{
+	return rte_eth_counters_xstats_get(dev, offsetof(struct pcap_tx_queue, tx_stat),
+					   offsetof(struct pcap_rx_queue, rx_stat),
+					   xstats, n);
+}
+
+
 static inline void
 infinite_rx_ring_free(struct rte_ring *pkts)
 {
@@ -929,13 +881,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 				pcap_pkt_count);
 			return -EINVAL;
 		}
-
-		/*
-		 * Reset the stats for this queue since eth_pcap_rx calls above
-		 * didn't result in the application receiving packets.
-		 */
-		pcap_q->rx_stat.pkts = 0;
-		pcap_q->rx_stat.bytes = 0;
 	}
 
 	return 0;
@@ -1005,6 +950,9 @@ static const struct eth_dev_ops ops = {
 	.link_update = eth_link_update,
 	.stats_get = eth_stats_get,
 	.stats_reset = eth_stats_reset,
+	.xstats_get_names = eth_xstats_get_names,
+	.xstats_get = eth_xstats_get,
+	.xstats_reset = eth_stats_reset,
 };
 
 static int
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [RFC v2 6/7] net/af_xdp: use generic SW stats
  2024-05-13 18:52   ` [RFC v2 0/7] generic SW counters Stephen Hemminger
                       ` (4 preceding siblings ...)
  2024-05-13 18:52     ` [RFC v2 5/7] net/pcap: " Stephen Hemminger
@ 2024-05-13 18:52     ` Stephen Hemminger
  2024-05-13 18:52     ` [RFC v2 7/7] net/ring: " Stephen Hemminger
  6 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-13 18:52 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Ciara Loftus

Use common code for all SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 115 +++++++++++-----------------
 1 file changed, 44 insertions(+), 71 deletions(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 268a130c49..9420420aa4 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -20,6 +20,7 @@
 #include <rte_ethdev.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
 #include <rte_string_fns.h>
@@ -120,19 +121,13 @@ struct xsk_umem_info {
 	uint32_t max_xsks;
 };
 
-struct rx_stats {
-	uint64_t rx_pkts;
-	uint64_t rx_bytes;
-	uint64_t rx_dropped;
-};
-
 struct pkt_rx_queue {
 	struct xsk_ring_cons rx;
 	struct xsk_umem_info *umem;
 	struct xsk_socket *xsk;
 	struct rte_mempool *mb_pool;
 
-	struct rx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct xsk_ring_prod fq;
 	struct xsk_ring_cons cq;
@@ -143,17 +138,11 @@ struct pkt_rx_queue {
 	int busy_budget;
 };
 
-struct tx_stats {
-	uint64_t tx_pkts;
-	uint64_t tx_bytes;
-	uint64_t tx_dropped;
-};
-
 struct pkt_tx_queue {
 	struct xsk_ring_prod tx;
 	struct xsk_umem_info *umem;
 
-	struct tx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct pkt_rx_queue *pair;
 	int xsk_queue_idx;
@@ -308,7 +297,6 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_ring_prod *fq = &rxq->fq;
 	struct xsk_umem_info *umem = rxq->umem;
 	uint32_t idx_rx = 0;
-	unsigned long rx_bytes = 0;
 	int i;
 	struct rte_mbuf *fq_bufs[ETH_AF_XDP_RX_BATCH_SIZE];
 
@@ -363,16 +351,13 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		rte_pktmbuf_pkt_len(bufs[i]) = len;
 		rte_pktmbuf_data_len(bufs[i]) = len;
-		rx_bytes += len;
+
+		rte_eth_count_mbuf(&rxq->stats, bufs[i]);
 	}
 
 	xsk_ring_cons__release(rx, nb_pkts);
 	(void)reserve_fill_queue(umem, nb_pkts, fq_bufs, fq);
 
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
-
 	return nb_pkts;
 }
 #else
@@ -384,7 +369,6 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_umem_info *umem = rxq->umem;
 	struct xsk_ring_prod *fq = &rxq->fq;
 	uint32_t idx_rx = 0;
-	unsigned long rx_bytes = 0;
 	int i;
 	uint32_t free_thresh = fq->size >> 1;
 	struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
@@ -424,16 +408,13 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		rte_ring_enqueue(umem->buf_ring, (void *)addr);
 		rte_pktmbuf_pkt_len(mbufs[i]) = len;
 		rte_pktmbuf_data_len(mbufs[i]) = len;
-		rx_bytes += len;
+		rte_eth_count_mbuf(&rxq->stats, mbufs[i]);
+
 		bufs[i] = mbufs[i];
 	}
 
 	xsk_ring_cons__release(rx, nb_pkts);
 
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
-
 	return nb_pkts;
 }
 #endif
@@ -527,9 +508,8 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct pkt_tx_queue *txq = queue;
 	struct xsk_umem_info *umem = txq->umem;
 	struct rte_mbuf *mbuf;
-	unsigned long tx_bytes = 0;
 	int i;
-	uint32_t idx_tx;
+	uint32_t idx_tx, pkt_len;
 	uint16_t count = 0;
 	struct xdp_desc *desc;
 	uint64_t addr, offset;
@@ -541,6 +521,7 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	for (i = 0; i < nb_pkts; i++) {
 		mbuf = bufs[i];
+		pkt_len = rte_pktmbuf_pkt_len(mbuf);
 
 		if (mbuf->pool == umem->mb_pool) {
 			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
@@ -589,17 +570,13 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			count++;
 		}
 
-		tx_bytes += mbuf->pkt_len;
+		rte_eth_count_packet(&txq->stats, pkt_len);
 	}
 
 out:
 	xsk_ring_prod__submit(&txq->tx, count);
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += count;
-	txq->stats.tx_bytes += tx_bytes;
-	txq->stats.tx_dropped += nb_pkts - count;
-
 	return count;
 }
 #else
@@ -610,7 +587,6 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_umem_info *umem = txq->umem;
 	struct rte_mbuf *mbuf;
 	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
-	unsigned long tx_bytes = 0;
 	int i;
 	uint32_t idx_tx;
 	struct xsk_ring_cons *cq = &txq->pair->cq;
@@ -640,7 +616,8 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		pkt = xsk_umem__get_data(umem->mz->addr,
 					 desc->addr);
 		rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len);
-		tx_bytes += mbuf->pkt_len;
+		rte_eth_qsw_update(&txq->stats, mbuf);
+
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -648,9 +625,6 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += nb_pkts;
-	txq->stats.tx_bytes += tx_bytes;
-
 	return nb_pkts;
 }
 
@@ -847,39 +821,26 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
 	struct pmd_process_private *process_private = dev->process_private;
-	struct xdp_statistics xdp_stats;
-	struct pkt_rx_queue *rxq;
-	struct pkt_tx_queue *txq;
-	socklen_t optlen;
-	int i, ret, fd;
+	unsigned int i;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		optlen = sizeof(struct xdp_statistics);
-		rxq = &internals->rx_queues[i];
-		txq = rxq->pair;
-		stats->q_ipackets[i] = rxq->stats.rx_pkts;
-		stats->q_ibytes[i] = rxq->stats.rx_bytes;
+	rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+				   offsetof(struct pkt_rx_queue, stats), stats);
 
-		stats->q_opackets[i] = txq->stats.tx_pkts;
-		stats->q_obytes[i] = txq->stats.tx_bytes;
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct xdp_statistics xdp_stats;
+		socklen_t optlen = sizeof(xdp_stats);
+		int fd;
 
-		stats->ipackets += stats->q_ipackets[i];
-		stats->ibytes += stats->q_ibytes[i];
-		stats->imissed += rxq->stats.rx_dropped;
-		stats->oerrors += txq->stats.tx_dropped;
 		fd = process_private->rxq_xsk_fds[i];
-		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
-					   &xdp_stats, &optlen) : -1;
-		if (ret != 0) {
+		if (fd < 0)
+			continue;
+		if (getsockopt(fd, SOL_XDP, XDP_STATISTICS,
+			       &xdp_stats, &optlen)  < 0) {
 			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
 			return -1;
 		}
 		stats->imissed += xdp_stats.rx_dropped;
-
-		stats->opackets += stats->q_opackets[i];
-		stats->obytes += stats->q_obytes[i];
 	}
 
 	return 0;
@@ -888,19 +849,28 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
-	int i;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
+}
 
-	for (i = 0; i < internals->queue_cnt; i++) {
-		memset(&internals->rx_queues[i].stats, 0,
-					sizeof(struct rx_stats));
-		memset(&internals->tx_queues[i].stats, 0,
-					sizeof(struct tx_stats));
-	}
+static int
+eth_xstats_get_names(struct rte_eth_dev *dev,
+		     struct rte_eth_xstat_name *names,
+		     __rte_unused unsigned int limit)
+{
+	return rte_eth_counters_xstats_get_names(dev, names);
+}
 
-	return 0;
+static int
+eth_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
+	       unsigned int n)
+{
+	return rte_eth_counters_xstats_get(dev, offsetof(struct pkt_tx_queue, stats),
+					   offsetof(struct pkt_rx_queue, stats),
+					   xstats, n);
 }
 
+
 #ifdef RTE_NET_AF_XDP_LIBBPF_XDP_ATTACH
 
 static int link_xdp_prog_with_dev(int ifindex, int fd, __u32 flags)
@@ -1899,6 +1869,9 @@ static const struct eth_dev_ops ops_cni = {
 	.link_update = eth_link_update,
 	.stats_get = eth_stats_get,
 	.stats_reset = eth_stats_reset,
+	.xstats_get_names = eth_xstats_get_names,
+	.xstats_get = eth_xstats_get,
+	.xstats_reset = eth_stats_reset,
 	.get_monitor_addr = eth_get_monitor_addr,
 };
 
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [RFC v2 7/7] net/ring: use generic SW stats
  2024-05-13 18:52   ` [RFC v2 0/7] generic SW counters Stephen Hemminger
                       ` (5 preceding siblings ...)
  2024-05-13 18:52     ` [RFC v2 6/7] net/af_xdp: " Stephen Hemminger
@ 2024-05-13 18:52     ` Stephen Hemminger
  6 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-13 18:52 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Bruce Richardson

Use generic per-queue infrastructure.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/ring/rte_eth_ring.c | 85 +++++++++++++++++----------------
 1 file changed, 44 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index 48953dd7a0..550b927392 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -7,6 +7,7 @@
 #include "rte_eth_ring.h"
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <rte_os_shim.h>
@@ -44,8 +45,8 @@ enum dev_action {
 
 struct ring_queue {
 	struct rte_ring *rng;
-	uint64_t rx_pkts;
-	uint64_t tx_pkts;
+
+	struct rte_eth_counters stats;
 };
 
 struct pmd_internals {
@@ -77,12 +78,13 @@ eth_ring_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
 	struct ring_queue *r = q;
-	const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SC_DEQ)
-		r->rx_pkts += nb_rx;
-	else
-		__atomic_fetch_add(&r->rx_pkts, nb_rx, __ATOMIC_RELAXED);
+	uint16_t i, nb_rx;
+
+	nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	for (i = 0; i < nb_rx; i++)
+		rte_eth_count_mbuf(&r->stats, bufs[i]);
+
 	return nb_rx;
 }
 
@@ -90,13 +92,20 @@ static uint16_t
 eth_ring_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
+	uint32_t *sizes;
 	struct ring_queue *r = q;
-	const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SP_ENQ)
-		r->tx_pkts += nb_tx;
-	else
-		__atomic_fetch_add(&r->tx_pkts, nb_tx, __ATOMIC_RELAXED);
+	uint16_t i, nb_tx;
+
+	sizes = alloca(sizeof(uint32_t) * nb_bufs);
+
+	for (i = 0; i < nb_bufs; i++)
+		sizes[i] = rte_pktmbuf_pkt_len(bufs[i]);
+
+	nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	for (i = 0; i < nb_tx; i++)
+		rte_eth_count_packet(&r->stats, sizes[i]);
+
 	return nb_tx;
 }
 
@@ -193,42 +202,33 @@ eth_dev_info(struct rte_eth_dev *dev,
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
-		rx_total += stats->q_ipackets[i];
-	}
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
-		tx_total += stats->q_opackets[i];
-	}
-
-	stats->ipackets = rx_total;
-	stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct ring_queue, stats),
+					  offsetof(struct ring_queue, stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
+	return rte_eth_counters_reset(dev, offsetof(struct ring_queue, stats),
+				      offsetof(struct ring_queue, stats));
+}
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		internal->rx_ring_queues[i].rx_pkts = 0;
-	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		internal->tx_ring_queues[i].tx_pkts = 0;
+static int
+eth_xstats_get_names(struct rte_eth_dev *dev, struct rte_eth_xstat_name *names,
+		     __rte_unused unsigned int limit)
+{
+	return rte_eth_counters_xstats_get_names(dev, names);
+}
 
-	return 0;
+static int
+eth_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, unsigned int n)
+{
+	return rte_eth_counters_xstats_get(dev, offsetof(struct ring_queue, stats),
+					   offsetof(struct ring_queue, stats), xstats, n);
 }
 
+
 static void
 eth_mac_addr_remove(struct rte_eth_dev *dev __rte_unused,
 	uint32_t index __rte_unused)
@@ -339,6 +339,9 @@ static const struct eth_dev_ops ops = {
 	.link_update = eth_link_update,
 	.stats_get = eth_stats_get,
 	.stats_reset = eth_stats_reset,
+	.xstats_get_names = eth_xstats_get_names,
+	.xstats_get = eth_xstats_get,
+	.xstats_reset = eth_stats_reset,
 	.mac_addr_remove = eth_mac_addr_remove,
 	.mac_addr_add = eth_mac_addr_add,
 	.promiscuous_enable = eth_promiscuous_enable,
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [RFC v2 1/7] eal: generic 64 bit counter
  2024-05-13 18:52     ` [RFC v2 1/7] eal: generic 64 bit counter Stephen Hemminger
@ 2024-05-13 19:36       ` Morten Brørup
  0 siblings, 0 replies; 179+ messages in thread
From: Morten Brørup @ 2024-05-13 19:36 UTC (permalink / raw)
  To: Stephen Hemminger, dev

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Monday, 13 May 2024 20.52
> 
> This header implements 64 bit counters that are NOT atomic
> but are safe against load/store splits on 32 bit platforms.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---

As an initial reaction I would have preferred get() instead of fetch(); get() being more common in DPDK function naming.
However, atomic functions use the name fetch(), not get(), and these are functions are slightly related to atomic functions.
So, after further consideration, it seems fetch() is the better choice.

Acked-by: Morten Brørup <mb@smartsharesystems.com>

PS:

Although I'm usually strongly opposed to EAL bloat... As long as the EAL also serves as a the library for simple utility functions, this belongs in the EAL.

Specific optimizations for various 32 bit architectures may be added later.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v3 0/7] Generic SW counters
  2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
                     ` (4 preceding siblings ...)
  2024-05-13 18:52   ` [RFC v2 0/7] generic SW counters Stephen Hemminger
@ 2024-05-14 15:35   ` Stephen Hemminger
  2024-05-14 15:35     ` [PATCH v3 1/7] eal: generic 64 bit counter Stephen Hemminger
                       ` (6 more replies)
  2024-05-15 23:40   ` [PATCH v4 0/8] Generic 64 bit counters for SW PMD's Stephen Hemminger
                     ` (6 subsequent siblings)
  12 siblings, 7 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-14 15:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Adds common code for collecting basic statistics used by many
SW based PMD's such as af_packet, af_xdp, tap and ring.

Solves the problem where counters could tear because of
32 bit load/store on 32 bit platforms.

v3 - fix missing const on 32 bit
   - add some docbook comments
   - get rid of unused statistics from previous version

Stephen Hemminger (7):
  eal: generic 64 bit counter
  ethdev: add internal helper of SW driver statistics
  net/af_packet: use SW stats helper
  net/af_xdp: use generic SW stats
  net/pcap: use generic SW stats
  net/ring: use generic SW stats
  net/tap: use generic SW stats

 drivers/net/af_packet/rte_eth_af_packet.c |  82 +++-----------
 drivers/net/af_xdp/rte_eth_af_xdp.c       |  98 +++++------------
 drivers/net/pcap/pcap_ethdev.c            | 125 +++++-----------------
 drivers/net/ring/rte_eth_ring.c           |  71 +++++-------
 drivers/net/tap/rte_eth_tap.c             |  88 +++------------
 drivers/net/tap/rte_eth_tap.h             |  15 +--
 lib/eal/include/meson.build               |   1 +
 lib/eal/include/rte_counter.h             |  91 ++++++++++++++++
 lib/ethdev/ethdev_swstats.c               | 106 ++++++++++++++++++
 lib/ethdev/ethdev_swstats.h               | 124 +++++++++++++++++++++
 lib/ethdev/meson.build                    |   2 +
 lib/ethdev/version.map                    |   3 +
 12 files changed, 438 insertions(+), 368 deletions(-)
 create mode 100644 lib/eal/include/rte_counter.h
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v3 1/7] eal: generic 64 bit counter
  2024-05-14 15:35   ` [PATCH v3 0/7] Generic SW counters Stephen Hemminger
@ 2024-05-14 15:35     ` Stephen Hemminger
  2024-05-15  9:30       ` Morten Brørup
  2024-05-26  6:45       ` Mattias Rönnblom
  2024-05-14 15:35     ` [PATCH v3 2/7] ethdev: add internal helper of SW driver statistics Stephen Hemminger
                       ` (5 subsequent siblings)
  6 siblings, 2 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-14 15:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Morten Brørup

This header implements 64 bit counters that are NOT atomic
but are safe against load/store splits on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 lib/eal/include/meson.build   |  1 +
 lib/eal/include/rte_counter.h | 91 +++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+)
 create mode 100644 lib/eal/include/rte_counter.h

diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
index e94b056d46..c070dd0079 100644
--- a/lib/eal/include/meson.build
+++ b/lib/eal/include/meson.build
@@ -12,6 +12,7 @@ headers += files(
         'rte_class.h',
         'rte_common.h',
         'rte_compat.h',
+        'rte_counter.h',
         'rte_debug.h',
         'rte_dev.h',
         'rte_devargs.h',
diff --git a/lib/eal/include/rte_counter.h b/lib/eal/include/rte_counter.h
new file mode 100644
index 0000000000..8068d6d26e
--- /dev/null
+++ b/lib/eal/include/rte_counter.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_COUNTER_H_
+#define _RTE_COUNTER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Counter
+ *
+ * A counter is 64 bit value that is safe from split read/write
+ * on 32 bit platforms. It assumes that only one cpu at a time
+ * will update the counter, and another CPU may want to read it.
+ *
+ * This is a much weaker guarantee than full atomic variables
+ * but is faster since no locked operations are required for update.
+ */
+
+#include <stdatomic.h>
+
+#ifdef RTE_ARCH_64
+/*
+ * On a platform that can support native 64 bit type, no special handling.
+ * These are just wrapper around 64 bit value.
+ */
+typedef uint64_t rte_counter64_t;
+
+/**
+ * Add value to counter.
+ */
+__rte_experimental
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	*counter += val;
+}
+
+__rte_experimental
+static inline uint64_t
+rte_counter64_fetch(const rte_counter64_t *counter)
+{
+	return *counter;
+}
+
+__rte_experimental
+static inline void
+rte_counter64_reset(rte_counter64_t *counter)
+{
+	*counter = 0;
+}
+
+#else
+/*
+ * On a 32 bit platform need to use atomic to force the compler to not
+ * split 64 bit read/write.
+ */
+typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
+
+__rte_experimental
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
+}
+
+__rte_experimental
+static inline uint64_t
+rte_counter64_fetch(const rte_counter64_t *counter)
+{
+	return rte_atomic_load_explicit(counter, rte_memory_order_relaxed);
+}
+
+__rte_experimental
+static inline void
+rte_counter64_reset(rte_counter64_t *counter)
+{
+	rte_atomic_store_explicit(counter, 0, rte_memory_order_relaxed);
+}
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_COUNTER_H_ */
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v3 2/7] ethdev: add internal helper of SW driver statistics
  2024-05-14 15:35   ` [PATCH v3 0/7] Generic SW counters Stephen Hemminger
  2024-05-14 15:35     ` [PATCH v3 1/7] eal: generic 64 bit counter Stephen Hemminger
@ 2024-05-14 15:35     ` Stephen Hemminger
  2024-05-14 15:35     ` [PATCH v3 3/7] net/af_packet: use SW stats helper Stephen Hemminger
                       ` (4 subsequent siblings)
  6 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-14 15:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko

Introduce common helper routines for keeping track of per-queue
statistics in SW PMD's. The code in several drivers had
copy/pasted the same code for this, but had common issues
with 64 bit counters on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/ethdev/ethdev_swstats.c | 106 ++++++++++++++++++++++++++++++
 lib/ethdev/ethdev_swstats.h | 124 ++++++++++++++++++++++++++++++++++++
 lib/ethdev/meson.build      |   2 +
 lib/ethdev/version.map      |   3 +
 4 files changed, 235 insertions(+)
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

diff --git a/lib/ethdev/ethdev_swstats.c b/lib/ethdev/ethdev_swstats.c
new file mode 100644
index 0000000000..4421b59930
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.c
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#include <stdbool.h>
+
+#include <rte_common.h>
+#include <ethdev_driver.h>
+
+#include "rte_ethdev.h"
+#include "ethdev_swstats.h"
+
+int
+rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset,
+			   struct rte_eth_stats *stats)
+{
+	unsigned int i;
+	uint64_t packets, bytes, errors;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		const void *txq = dev->data->tx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (txq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)txq + tx_offset);
+		packets = rte_counter64_fetch(&counters->packets);
+		bytes = rte_counter64_fetch(&counters->bytes);
+		errors = rte_counter64_fetch(&counters->errors);
+
+		/* Prevent compiler for fetching these values twice */
+		rte_compiler_barrier();
+
+		stats->opackets += packets;
+		stats->obytes += bytes;
+		stats->oerrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_opackets[i] = packets;
+			stats->q_obytes[i] = bytes;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		const void *rxq = dev->data->rx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)rxq + rx_offset);
+		packets = rte_counter64_fetch(&counters->packets);
+		bytes = rte_counter64_fetch(&counters->bytes);
+		errors = rte_counter64_fetch(&counters->errors);
+
+		/* Prevent compiler for fetching these values twice */
+		rte_compiler_barrier();
+
+		stats->ipackets += packets;
+		stats->ibytes += bytes;
+		stats->ierrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_ipackets[i] = packets;
+			stats->q_ibytes[i] = bytes;
+		}
+	}
+
+	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
+	return 0;
+}
+
+int
+rte_eth_counters_reset(struct rte_eth_dev *dev, size_t tx_offset, size_t rx_offset)
+{
+	struct rte_eth_counters *counters;
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		void *txq  = dev->data->tx_queues[i];
+
+		if (txq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)txq + tx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		void *rxq  = dev->data->rx_queues[i];
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)rxq + rx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	return 0;
+}
diff --git a/lib/ethdev/ethdev_swstats.h b/lib/ethdev/ethdev_swstats.h
new file mode 100644
index 0000000000..808c540640
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_ETHDEV_SWSTATS_H_
+#define _RTE_ETHDEV_SWSTATS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ *
+ * Internal statistics counters for software based devices.
+ * Hardware PMD's should use the hardware counters instead.
+ *
+ * This provides a library for PMD's to keep track of packets and bytes.
+ * It is assumed that this will be used per queue and queues are not
+ * shared by lcores.
+ */
+
+#include <rte_counter.h>
+
+/**
+ * A structure to be embedded in the device driver per-queue data.
+ */
+struct rte_eth_counters {
+	rte_counter64_t	packets;	/**< Total number of packets. */
+	rte_counter64_t	bytes;		/**< Total number of bytes. */
+	rte_counter64_t	errors;		/**< Total number of packets with errors. */
+};
+
+/**
+ * @internal
+ * Increment counters for a single packet.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ * @param sz
+ *    Size of the packet in bytes.
+ */
+__rte_internal
+static inline void
+rte_eth_count_packet(struct rte_eth_counters *counters, uint32_t sz)
+{
+	rte_counter64_add(&counters->packets, 1);
+	rte_counter64_add(&counters->bytes, sz);
+}
+
+/**
+ * @internal
+ * Increment counters based on mbuf.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ * @param mbuf
+ *    Received or transmitted mbuf.
+ */
+__rte_internal
+static inline void
+rte_eth_count_mbuf(struct rte_eth_counters *counters, const struct rte_mbuf *mbuf)
+{
+	rte_eth_count_packet(counters, rte_pktmbuf_pkt_len(mbuf));
+}
+
+/**
+ * @internal
+ * Increment error counter.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ */
+__rte_internal
+static inline void
+rte_eth_count_error(struct rte_eth_counters *counters)
+{
+	rte_counter64_add(&counters->errors, 1);
+}
+
+/**
+ * @internal
+ * Retrieve the general statistics for all queues.
+ * @see rte_eth_stats_get.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @param stats
+ *   A pointer to a structure of type *rte_eth_stats* to be filled
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			       size_t tx_offset, size_t rx_offset,
+			       struct rte_eth_stats *stats);
+
+/**
+ * @internal
+ * Reset the statistics for all queues.
+ * @see rte_eth_stats_reset.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_reset(struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHDEV_SWSTATS_H_ */
diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build
index f1d2586591..7ce29a46d4 100644
--- a/lib/ethdev/meson.build
+++ b/lib/ethdev/meson.build
@@ -3,6 +3,7 @@
 
 sources = files(
         'ethdev_driver.c',
+        'ethdev_swstats.c',
         'ethdev_private.c',
         'ethdev_profile.c',
         'ethdev_trace_points.c',
@@ -42,6 +43,7 @@ driver_sdk_headers += files(
         'ethdev_driver.h',
         'ethdev_pci.h',
         'ethdev_vdev.h',
+        'ethdev_swstats.h',
 )
 
 if is_linux
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 79f6f5293b..fc595be278 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -358,4 +358,7 @@ INTERNAL {
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
 	rte_flow_fp_default_ops;
+
+	rte_eth_counters_reset;
+	rte_eth_counters_stats_get;
 };
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v3 3/7] net/af_packet: use SW stats helper
  2024-05-14 15:35   ` [PATCH v3 0/7] Generic SW counters Stephen Hemminger
  2024-05-14 15:35     ` [PATCH v3 1/7] eal: generic 64 bit counter Stephen Hemminger
  2024-05-14 15:35     ` [PATCH v3 2/7] ethdev: add internal helper of SW driver statistics Stephen Hemminger
@ 2024-05-14 15:35     ` Stephen Hemminger
  2024-05-14 15:35     ` [PATCH v3 4/7] net/af_xdp: use generic SW stats Stephen Hemminger
                       ` (3 subsequent siblings)
  6 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-14 15:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, John W. Linville

Use the new generic SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_packet/rte_eth_af_packet.c | 82 ++++-------------------
 1 file changed, 14 insertions(+), 68 deletions(-)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 397a32db58..89b737e7dc 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -10,6 +10,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
@@ -29,6 +30,7 @@
 #include <unistd.h>
 #include <poll.h>
 
+
 #define ETH_AF_PACKET_IFACE_ARG		"iface"
 #define ETH_AF_PACKET_NUM_Q_ARG		"qpairs"
 #define ETH_AF_PACKET_BLOCKSIZE_ARG	"blocksz"
@@ -51,8 +53,7 @@ struct pkt_rx_queue {
 	uint16_t in_port;
 	uint8_t vlan_strip;
 
-	volatile unsigned long rx_pkts;
-	volatile unsigned long rx_bytes;
+	struct rte_eth_counters stats;
 };
 
 struct pkt_tx_queue {
@@ -64,11 +65,10 @@ struct pkt_tx_queue {
 	unsigned int framecount;
 	unsigned int framenum;
 
-	volatile unsigned long tx_pkts;
-	volatile unsigned long err_pkts;
-	volatile unsigned long tx_bytes;
+	struct rte_eth_counters stats;
 };
 
+
 struct pmd_internals {
 	unsigned nb_queues;
 
@@ -118,8 +118,6 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	uint8_t *pbuf;
 	struct pkt_rx_queue *pkt_q = queue;
-	uint16_t num_rx = 0;
-	unsigned long num_rx_bytes = 0;
 	unsigned int framecount, framenum;
 
 	if (unlikely(nb_pkts == 0))
@@ -164,13 +162,11 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* account for the receive frame */
 		bufs[i] = mbuf;
-		num_rx++;
-		num_rx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&pkt_q->stats, mbuf);
 	}
 	pkt_q->framenum = framenum;
-	pkt_q->rx_pkts += num_rx;
-	pkt_q->rx_bytes += num_rx_bytes;
-	return num_rx;
+
+	return i;
 }
 
 /*
@@ -205,8 +201,6 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	unsigned int framecount, framenum;
 	struct pollfd pfd;
 	struct pkt_tx_queue *pkt_q = queue;
-	uint16_t num_tx = 0;
-	unsigned long num_tx_bytes = 0;
 	int i;
 
 	if (unlikely(nb_pkts == 0))
@@ -285,8 +279,7 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			framenum = 0;
 		ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base;
 
-		num_tx++;
-		num_tx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&pkt_q->stats, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -298,15 +291,9 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		 * packets will be considered successful even though only some
 		 * are sent.
 		 */
-
-		num_tx = 0;
-		num_tx_bytes = 0;
 	}
 
 	pkt_q->framenum = framenum;
-	pkt_q->tx_pkts += num_tx;
-	pkt_q->err_pkts += i - num_tx;
-	pkt_q->tx_bytes += num_tx_bytes;
 	return i;
 }
 
@@ -386,58 +373,17 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
-		igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
-		rx_total += igb_stats->q_ipackets[i];
-		rx_bytes_total += igb_stats->q_ibytes[i];
-	}
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
-		igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
-		tx_total += igb_stats->q_opackets[i];
-		tx_err_total += internal->tx_queue[i].err_pkts;
-		tx_bytes_total += igb_stats->q_obytes[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->ibytes = rx_bytes_total;
-	igb_stats->opackets = tx_total;
-	igb_stats->oerrors = tx_err_total;
-	igb_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+					  offsetof(struct pkt_rx_queue, stats), stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->rx_queue[i].rx_pkts = 0;
-		internal->rx_queue[i].rx_bytes = 0;
-	}
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->tx_queue[i].tx_pkts = 0;
-		internal->tx_queue[i].err_pkts = 0;
-		internal->tx_queue[i].tx_bytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 static int
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v3 4/7] net/af_xdp: use generic SW stats
  2024-05-14 15:35   ` [PATCH v3 0/7] Generic SW counters Stephen Hemminger
                       ` (2 preceding siblings ...)
  2024-05-14 15:35     ` [PATCH v3 3/7] net/af_packet: use SW stats helper Stephen Hemminger
@ 2024-05-14 15:35     ` Stephen Hemminger
  2024-05-14 15:35     ` [PATCH v3 5/7] net/pcap: " Stephen Hemminger
                       ` (2 subsequent siblings)
  6 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-14 15:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Ciara Loftus

Use common code for all SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 98 ++++++++---------------------
 1 file changed, 25 insertions(+), 73 deletions(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 268a130c49..65fc2f478f 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -20,6 +20,7 @@
 #include <rte_ethdev.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
 #include <rte_string_fns.h>
@@ -120,19 +121,13 @@ struct xsk_umem_info {
 	uint32_t max_xsks;
 };
 
-struct rx_stats {
-	uint64_t rx_pkts;
-	uint64_t rx_bytes;
-	uint64_t rx_dropped;
-};
-
 struct pkt_rx_queue {
 	struct xsk_ring_cons rx;
 	struct xsk_umem_info *umem;
 	struct xsk_socket *xsk;
 	struct rte_mempool *mb_pool;
 
-	struct rx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct xsk_ring_prod fq;
 	struct xsk_ring_cons cq;
@@ -143,17 +138,11 @@ struct pkt_rx_queue {
 	int busy_budget;
 };
 
-struct tx_stats {
-	uint64_t tx_pkts;
-	uint64_t tx_bytes;
-	uint64_t tx_dropped;
-};
-
 struct pkt_tx_queue {
 	struct xsk_ring_prod tx;
 	struct xsk_umem_info *umem;
 
-	struct tx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct pkt_rx_queue *pair;
 	int xsk_queue_idx;
@@ -308,7 +297,6 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_ring_prod *fq = &rxq->fq;
 	struct xsk_umem_info *umem = rxq->umem;
 	uint32_t idx_rx = 0;
-	unsigned long rx_bytes = 0;
 	int i;
 	struct rte_mbuf *fq_bufs[ETH_AF_XDP_RX_BATCH_SIZE];
 
@@ -363,16 +351,13 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		rte_pktmbuf_pkt_len(bufs[i]) = len;
 		rte_pktmbuf_data_len(bufs[i]) = len;
-		rx_bytes += len;
+
+		rte_eth_count_mbuf(&rxq->stats, bufs[i]);
 	}
 
 	xsk_ring_cons__release(rx, nb_pkts);
 	(void)reserve_fill_queue(umem, nb_pkts, fq_bufs, fq);
 
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
-
 	return nb_pkts;
 }
 #else
@@ -384,7 +369,6 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_umem_info *umem = rxq->umem;
 	struct xsk_ring_prod *fq = &rxq->fq;
 	uint32_t idx_rx = 0;
-	unsigned long rx_bytes = 0;
 	int i;
 	uint32_t free_thresh = fq->size >> 1;
 	struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
@@ -424,16 +408,13 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		rte_ring_enqueue(umem->buf_ring, (void *)addr);
 		rte_pktmbuf_pkt_len(mbufs[i]) = len;
 		rte_pktmbuf_data_len(mbufs[i]) = len;
-		rx_bytes += len;
+		rte_eth_count_mbuf(&rxq->stats, mbufs[i]);
+
 		bufs[i] = mbufs[i];
 	}
 
 	xsk_ring_cons__release(rx, nb_pkts);
 
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
-
 	return nb_pkts;
 }
 #endif
@@ -527,9 +508,8 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct pkt_tx_queue *txq = queue;
 	struct xsk_umem_info *umem = txq->umem;
 	struct rte_mbuf *mbuf;
-	unsigned long tx_bytes = 0;
 	int i;
-	uint32_t idx_tx;
+	uint32_t idx_tx, pkt_len;
 	uint16_t count = 0;
 	struct xdp_desc *desc;
 	uint64_t addr, offset;
@@ -541,6 +521,7 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	for (i = 0; i < nb_pkts; i++) {
 		mbuf = bufs[i];
+		pkt_len = rte_pktmbuf_pkt_len(mbuf);
 
 		if (mbuf->pool == umem->mb_pool) {
 			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
@@ -589,17 +570,13 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			count++;
 		}
 
-		tx_bytes += mbuf->pkt_len;
+		rte_eth_count_packet(&txq->stats, pkt_len);
 	}
 
 out:
 	xsk_ring_prod__submit(&txq->tx, count);
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += count;
-	txq->stats.tx_bytes += tx_bytes;
-	txq->stats.tx_dropped += nb_pkts - count;
-
 	return count;
 }
 #else
@@ -610,7 +587,6 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_umem_info *umem = txq->umem;
 	struct rte_mbuf *mbuf;
 	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
-	unsigned long tx_bytes = 0;
 	int i;
 	uint32_t idx_tx;
 	struct xsk_ring_cons *cq = &txq->pair->cq;
@@ -640,7 +616,8 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		pkt = xsk_umem__get_data(umem->mz->addr,
 					 desc->addr);
 		rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len);
-		tx_bytes += mbuf->pkt_len;
+		rte_eth_qsw_update(&txq->stats, mbuf);
+
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -648,9 +625,6 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += nb_pkts;
-	txq->stats.tx_bytes += tx_bytes;
-
 	return nb_pkts;
 }
 
@@ -847,39 +821,26 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
 	struct pmd_process_private *process_private = dev->process_private;
-	struct xdp_statistics xdp_stats;
-	struct pkt_rx_queue *rxq;
-	struct pkt_tx_queue *txq;
-	socklen_t optlen;
-	int i, ret, fd;
+	unsigned int i;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		optlen = sizeof(struct xdp_statistics);
-		rxq = &internals->rx_queues[i];
-		txq = rxq->pair;
-		stats->q_ipackets[i] = rxq->stats.rx_pkts;
-		stats->q_ibytes[i] = rxq->stats.rx_bytes;
+	rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+				   offsetof(struct pkt_rx_queue, stats), stats);
 
-		stats->q_opackets[i] = txq->stats.tx_pkts;
-		stats->q_obytes[i] = txq->stats.tx_bytes;
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct xdp_statistics xdp_stats;
+		socklen_t optlen = sizeof(xdp_stats);
+		int fd;
 
-		stats->ipackets += stats->q_ipackets[i];
-		stats->ibytes += stats->q_ibytes[i];
-		stats->imissed += rxq->stats.rx_dropped;
-		stats->oerrors += txq->stats.tx_dropped;
 		fd = process_private->rxq_xsk_fds[i];
-		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
-					   &xdp_stats, &optlen) : -1;
-		if (ret != 0) {
+		if (fd < 0)
+			continue;
+		if (getsockopt(fd, SOL_XDP, XDP_STATISTICS,
+			       &xdp_stats, &optlen)  < 0) {
 			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
 			return -1;
 		}
 		stats->imissed += xdp_stats.rx_dropped;
-
-		stats->opackets += stats->q_opackets[i];
-		stats->obytes += stats->q_obytes[i];
 	}
 
 	return 0;
@@ -888,17 +849,8 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
-	int i;
-
-	for (i = 0; i < internals->queue_cnt; i++) {
-		memset(&internals->rx_queues[i].stats, 0,
-					sizeof(struct rx_stats));
-		memset(&internals->tx_queues[i].stats, 0,
-					sizeof(struct tx_stats));
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 #ifdef RTE_NET_AF_XDP_LIBBPF_XDP_ATTACH
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v3 5/7] net/pcap: use generic SW stats
  2024-05-14 15:35   ` [PATCH v3 0/7] Generic SW counters Stephen Hemminger
                       ` (3 preceding siblings ...)
  2024-05-14 15:35     ` [PATCH v3 4/7] net/af_xdp: use generic SW stats Stephen Hemminger
@ 2024-05-14 15:35     ` Stephen Hemminger
  2024-05-14 15:35     ` [PATCH v3 6/7] net/ring: " Stephen Hemminger
  2024-05-14 15:35     ` [PATCH v3 7/7] net/tap: " Stephen Hemminger
  6 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-14 15:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use common statistics for SW drivers.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/pcap/pcap_ethdev.c | 125 +++++++--------------------------
 1 file changed, 26 insertions(+), 99 deletions(-)

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index bfec085045..b1a983f871 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -11,6 +11,7 @@
 
 #include <rte_cycles.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <ethdev_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_malloc.h>
@@ -48,13 +49,6 @@ static uint8_t iface_idx;
 static uint64_t timestamp_rx_dynflag;
 static int timestamp_dynfield_offset = -1;
 
-struct queue_stat {
-	volatile unsigned long pkts;
-	volatile unsigned long bytes;
-	volatile unsigned long err_pkts;
-	volatile unsigned long rx_nombuf;
-};
-
 struct queue_missed_stat {
 	/* last value retrieved from pcap */
 	unsigned int pcap;
@@ -68,7 +62,7 @@ struct pcap_rx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
 	struct rte_mempool *mb_pool;
-	struct queue_stat rx_stat;
+	struct rte_eth_counters rx_stat;
 	struct queue_missed_stat missed_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
@@ -80,7 +74,7 @@ struct pcap_rx_queue {
 struct pcap_tx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
-	struct queue_stat tx_stat;
+	struct rte_eth_counters tx_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
 };
@@ -238,7 +232,6 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	int i;
 	struct pcap_rx_queue *pcap_q = queue;
-	uint32_t rx_bytes = 0;
 
 	if (unlikely(nb_pkts == 0))
 		return 0;
@@ -252,39 +245,35 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		if (err)
 			return i;
 
+		rte_eth_count_mbuf(&pcap_q->rx_stat, pcap_buf);
+
 		rte_memcpy(rte_pktmbuf_mtod(bufs[i], void *),
 				rte_pktmbuf_mtod(pcap_buf, void *),
 				pcap_buf->data_len);
 		bufs[i]->data_len = pcap_buf->data_len;
 		bufs[i]->pkt_len = pcap_buf->pkt_len;
 		bufs[i]->port = pcap_q->port_id;
-		rx_bytes += pcap_buf->data_len;
+
 
 		/* Enqueue packet back on ring to allow infinite rx. */
 		rte_ring_enqueue(pcap_q->pkts, pcap_buf);
 	}
 
-	pcap_q->rx_stat.pkts += i;
-	pcap_q->rx_stat.bytes += rx_bytes;
-
 	return i;
 }
 
 static uint16_t
 eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
+	struct pcap_rx_queue *pcap_q = queue;
+	struct rte_eth_dev *dev = &rte_eth_devices[pcap_q->port_id];
+	struct pmd_process_private *pp = dev->process_private;
+	pcap_t *pcap = pp->rx_pcap[pcap_q->queue_id];
 	unsigned int i;
 	struct pcap_pkthdr header;
-	struct pmd_process_private *pp;
 	const u_char *packet;
 	struct rte_mbuf *mbuf;
-	struct pcap_rx_queue *pcap_q = queue;
 	uint16_t num_rx = 0;
-	uint32_t rx_bytes = 0;
-	pcap_t *pcap;
-
-	pp = rte_eth_devices[pcap_q->port_id].process_private;
-	pcap = pp->rx_pcap[pcap_q->queue_id];
 
 	if (unlikely(pcap == NULL || nb_pkts == 0))
 		return 0;
@@ -300,7 +289,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		mbuf = rte_pktmbuf_alloc(pcap_q->mb_pool);
 		if (unlikely(mbuf == NULL)) {
-			pcap_q->rx_stat.rx_nombuf++;
+			++dev->data->rx_mbuf_alloc_failed;
 			break;
 		}
 
@@ -315,7 +304,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 						       mbuf,
 						       packet,
 						       header.caplen) == -1)) {
-				pcap_q->rx_stat.err_pkts++;
+				rte_eth_count_error(&pcap_q->rx_stat);
 				rte_pktmbuf_free(mbuf);
 				break;
 			}
@@ -329,11 +318,10 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		mbuf->ol_flags |= timestamp_rx_dynflag;
 		mbuf->port = pcap_q->port_id;
 		bufs[num_rx] = mbuf;
+
+		rte_eth_count_mbuf(&pcap_q->rx_stat, mbuf);
 		num_rx++;
-		rx_bytes += header.caplen;
 	}
-	pcap_q->rx_stat.pkts += num_rx;
-	pcap_q->rx_stat.bytes += rx_bytes;
 
 	return num_rx;
 }
@@ -379,8 +367,6 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *dumper_q = queue;
-	uint16_t num_tx = 0;
-	uint32_t tx_bytes = 0;
 	struct pcap_pkthdr header;
 	pcap_dumper_t *dumper;
 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
@@ -412,8 +398,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		pcap_dump((u_char *)dumper, &header,
 			rte_pktmbuf_read(mbuf, 0, caplen, temp_data));
 
-		num_tx++;
-		tx_bytes += caplen;
+		rte_eth_count_mbuf(&dumper_q->tx_stat, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -423,9 +408,6 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	 * we flush the pcap dumper within each burst.
 	 */
 	pcap_dump_flush(dumper);
-	dumper_q->tx_stat.pkts += num_tx;
-	dumper_q->tx_stat.bytes += tx_bytes;
-	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
 
 	return nb_pkts;
 }
@@ -437,20 +419,16 @@ static uint16_t
 eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	unsigned int i;
-	uint32_t tx_bytes = 0;
 	struct pcap_tx_queue *tx_queue = queue;
 
 	if (unlikely(nb_pkts == 0))
 		return 0;
 
 	for (i = 0; i < nb_pkts; i++) {
-		tx_bytes += bufs[i]->pkt_len;
+		rte_eth_count_mbuf(&tx_queue->tx_stat, bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	tx_queue->tx_stat.pkts += nb_pkts;
-	tx_queue->tx_stat.bytes += tx_bytes;
-
 	return i;
 }
 
@@ -465,8 +443,6 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *tx_queue = queue;
-	uint16_t num_tx = 0;
-	uint32_t tx_bytes = 0;
 	pcap_t *pcap;
 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
 	size_t len;
@@ -497,15 +473,11 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			rte_pktmbuf_read(mbuf, 0, len, temp_data), len);
 		if (unlikely(ret != 0))
 			break;
-		num_tx++;
-		tx_bytes += len;
+
+		rte_eth_count_mbuf(&tx_queue->tx_stat, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
-	tx_queue->tx_stat.pkts += num_tx;
-	tx_queue->tx_stat.bytes += tx_bytes;
-	tx_queue->tx_stat.err_pkts += i - num_tx;
-
 	return i;
 }
 
@@ -746,41 +718,12 @@ static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned int i;
-	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
-	unsigned long rx_missed_total = 0;
-	unsigned long rx_nombuf_total = 0, rx_err_total = 0;
-	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
-	unsigned long tx_packets_err_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_queue[i].rx_stat.pkts;
-		stats->q_ibytes[i] = internal->rx_queue[i].rx_stat.bytes;
-		rx_nombuf_total += internal->rx_queue[i].rx_stat.rx_nombuf;
-		rx_err_total += internal->rx_queue[i].rx_stat.err_pkts;
-		rx_packets_total += stats->q_ipackets[i];
-		rx_bytes_total += stats->q_ibytes[i];
-		rx_missed_total += queue_missed_stat_get(dev, i);
-	}
 
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_queue[i].tx_stat.pkts;
-		stats->q_obytes[i] = internal->tx_queue[i].tx_stat.bytes;
-		tx_packets_total += stats->q_opackets[i];
-		tx_bytes_total += stats->q_obytes[i];
-		tx_packets_err_total += internal->tx_queue[i].tx_stat.err_pkts;
-	}
+	rte_eth_counters_stats_get(dev, offsetof(struct pcap_tx_queue, tx_stat),
+				   offsetof(struct pcap_rx_queue, rx_stat), stats);
 
-	stats->ipackets = rx_packets_total;
-	stats->ibytes = rx_bytes_total;
-	stats->imissed = rx_missed_total;
-	stats->ierrors = rx_err_total;
-	stats->rx_nombuf = rx_nombuf_total;
-	stats->opackets = tx_packets_total;
-	stats->obytes = tx_bytes_total;
-	stats->oerrors = tx_packets_err_total;
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		stats->imissed += queue_missed_stat_get(dev, i);
 
 	return 0;
 }
@@ -789,21 +732,12 @@ static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
 	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		internal->rx_queue[i].rx_stat.pkts = 0;
-		internal->rx_queue[i].rx_stat.bytes = 0;
-		internal->rx_queue[i].rx_stat.err_pkts = 0;
-		internal->rx_queue[i].rx_stat.rx_nombuf = 0;
-		queue_missed_stat_reset(dev, i);
-	}
+	rte_eth_counters_reset(dev, offsetof(struct pcap_tx_queue, tx_stat),
+			       offsetof(struct pcap_rx_queue, rx_stat));
 
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		internal->tx_queue[i].tx_stat.pkts = 0;
-		internal->tx_queue[i].tx_stat.bytes = 0;
-		internal->tx_queue[i].tx_stat.err_pkts = 0;
-	}
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		queue_missed_stat_reset(dev, i);
 
 	return 0;
 }
@@ -929,13 +863,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 				pcap_pkt_count);
 			return -EINVAL;
 		}
-
-		/*
-		 * Reset the stats for this queue since eth_pcap_rx calls above
-		 * didn't result in the application receiving packets.
-		 */
-		pcap_q->rx_stat.pkts = 0;
-		pcap_q->rx_stat.bytes = 0;
 	}
 
 	return 0;
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v3 6/7] net/ring: use generic SW stats
  2024-05-14 15:35   ` [PATCH v3 0/7] Generic SW counters Stephen Hemminger
                       ` (4 preceding siblings ...)
  2024-05-14 15:35     ` [PATCH v3 5/7] net/pcap: " Stephen Hemminger
@ 2024-05-14 15:35     ` Stephen Hemminger
  2024-05-14 15:35     ` [PATCH v3 7/7] net/tap: " Stephen Hemminger
  6 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-14 15:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Bruce Richardson

Use generic per-queue infrastructure.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/ring/rte_eth_ring.c | 71 +++++++++++++--------------------
 1 file changed, 28 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index 48953dd7a0..85f14dd679 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -7,6 +7,7 @@
 #include "rte_eth_ring.h"
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <rte_os_shim.h>
@@ -44,8 +45,8 @@ enum dev_action {
 
 struct ring_queue {
 	struct rte_ring *rng;
-	uint64_t rx_pkts;
-	uint64_t tx_pkts;
+
+	struct rte_eth_counters stats;
 };
 
 struct pmd_internals {
@@ -77,12 +78,13 @@ eth_ring_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
 	struct ring_queue *r = q;
-	const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SC_DEQ)
-		r->rx_pkts += nb_rx;
-	else
-		__atomic_fetch_add(&r->rx_pkts, nb_rx, __ATOMIC_RELAXED);
+	uint16_t i, nb_rx;
+
+	nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	for (i = 0; i < nb_rx; i++)
+		rte_eth_count_mbuf(&r->stats, bufs[i]);
+
 	return nb_rx;
 }
 
@@ -90,13 +92,20 @@ static uint16_t
 eth_ring_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
+	uint32_t *sizes;
 	struct ring_queue *r = q;
-	const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SP_ENQ)
-		r->tx_pkts += nb_tx;
-	else
-		__atomic_fetch_add(&r->tx_pkts, nb_tx, __ATOMIC_RELAXED);
+	uint16_t i, nb_tx;
+
+	sizes = alloca(sizeof(uint32_t) * nb_bufs);
+
+	for (i = 0; i < nb_bufs; i++)
+		sizes[i] = rte_pktmbuf_pkt_len(bufs[i]);
+
+	nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	for (i = 0; i < nb_tx; i++)
+		rte_eth_count_packet(&r->stats, sizes[i]);
+
 	return nb_tx;
 }
 
@@ -193,40 +202,16 @@ eth_dev_info(struct rte_eth_dev *dev,
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
-		rx_total += stats->q_ipackets[i];
-	}
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
-		tx_total += stats->q_opackets[i];
-	}
-
-	stats->ipackets = rx_total;
-	stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct ring_queue, stats),
+					  offsetof(struct ring_queue, stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		internal->rx_ring_queues[i].rx_pkts = 0;
-	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		internal->tx_ring_queues[i].tx_pkts = 0;
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct ring_queue, stats),
+				      offsetof(struct ring_queue, stats));
 }
 
 static void
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v3 7/7] net/tap: use generic SW stats
  2024-05-14 15:35   ` [PATCH v3 0/7] Generic SW counters Stephen Hemminger
                       ` (5 preceding siblings ...)
  2024-05-14 15:35     ` [PATCH v3 6/7] net/ring: " Stephen Hemminger
@ 2024-05-14 15:35     ` Stephen Hemminger
  6 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-14 15:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use new common sw statistics.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/tap/rte_eth_tap.c | 88 ++++++-----------------------------
 drivers/net/tap/rte_eth_tap.h | 15 ++----
 2 files changed, 18 insertions(+), 85 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 69d9da695b..f87979da4f 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -432,7 +432,6 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rx_queue *rxq = queue;
 	struct pmd_process_private *process_private;
 	uint16_t num_rx;
-	unsigned long num_rx_bytes = 0;
 	uint32_t trigger = tap_trigger;
 
 	if (trigger == rxq->trigger_seen)
@@ -455,7 +454,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* Packet couldn't fit in the provided mbuf */
 		if (unlikely(rxq->pi.flags & TUN_PKT_STRIP)) {
-			rxq->stats.ierrors++;
+			rte_eth_count_error(&rxq->stats);
 			continue;
 		}
 
@@ -467,7 +466,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
 
 			if (unlikely(!buf)) {
-				rxq->stats.rx_nombuf++;
+				struct rte_eth_dev *dev = &rte_eth_devices[rxq->in_port];
+				++dev->data->rx_mbuf_alloc_failed;
+
 				/* No new buf has been allocated: do nothing */
 				if (!new_tail || !seg)
 					goto end;
@@ -509,11 +510,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* account for the receive frame */
 		bufs[num_rx++] = mbuf;
-		num_rx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&rxq->stats, mbuf);
 	}
 end:
-	rxq->stats.ipackets += num_rx;
-	rxq->stats.ibytes += num_rx_bytes;
 
 	if (trigger && num_rx < nb_pkts)
 		rxq->trigger_seen = trigger;
@@ -523,8 +522,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 static inline int
 tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
-			struct rte_mbuf **pmbufs,
-			uint16_t *num_packets, unsigned long *num_tx_bytes)
+		struct rte_mbuf **pmbufs)
 {
 	struct pmd_process_private *process_private;
 	int i;
@@ -647,8 +645,7 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
 		if (n <= 0)
 			return -1;
 
-		(*num_packets)++;
-		(*num_tx_bytes) += rte_pktmbuf_pkt_len(mbuf);
+		rte_eth_count_mbuf(&txq->stats, mbuf);
 	}
 	return 0;
 }
@@ -660,8 +657,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	struct tx_queue *txq = queue;
 	uint16_t num_tx = 0;
-	uint16_t num_packets = 0;
-	unsigned long num_tx_bytes = 0;
 	uint32_t max_size;
 	int i;
 
@@ -693,7 +688,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			tso_segsz = mbuf_in->tso_segsz + hdrs_len;
 			if (unlikely(tso_segsz == hdrs_len) ||
 				tso_segsz > *txq->mtu) {
-				txq->stats.errs++;
+				rte_eth_count_error(&txq->stats);
 				break;
 			}
 			gso_ctx->gso_size = tso_segsz;
@@ -728,10 +723,10 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			num_mbufs = 1;
 		}
 
-		ret = tap_write_mbufs(txq, num_mbufs, mbuf,
-				&num_packets, &num_tx_bytes);
+		ret = tap_write_mbufs(txq, num_mbufs, mbuf);
 		if (ret == -1) {
-			txq->stats.errs++;
+			rte_eth_count_error(&txq->stats);
+
 			/* free tso mbufs */
 			if (num_tso_mbufs > 0)
 				rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs);
@@ -749,10 +744,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		}
 	}
 
-	txq->stats.opackets += num_packets;
-	txq->stats.errs += nb_pkts - num_tx;
-	txq->stats.obytes += num_tx_bytes;
-
 	return num_tx;
 }
 
@@ -1055,64 +1046,15 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
 {
-	unsigned int i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	unsigned long rx_nombuf = 0, ierrors = 0;
-	const struct pmd_internals *pmd = dev->data->dev_private;
-
-	/* rx queue statistics */
-	imax = (dev->data->nb_rx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_rx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
-		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
-		rx_total += tap_stats->q_ipackets[i];
-		rx_bytes_total += tap_stats->q_ibytes[i];
-		rx_nombuf += pmd->rxq[i].stats.rx_nombuf;
-		ierrors += pmd->rxq[i].stats.ierrors;
-	}
-
-	/* tx queue statistics */
-	imax = (dev->data->nb_tx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_tx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
-		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
-		tx_total += tap_stats->q_opackets[i];
-		tx_err_total += pmd->txq[i].stats.errs;
-		tx_bytes_total += tap_stats->q_obytes[i];
-	}
-
-	tap_stats->ipackets = rx_total;
-	tap_stats->ibytes = rx_bytes_total;
-	tap_stats->ierrors = ierrors;
-	tap_stats->rx_nombuf = rx_nombuf;
-	tap_stats->opackets = tx_total;
-	tap_stats->oerrors = tx_err_total;
-	tap_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct tx_queue, stats),
+					  offsetof(struct rx_queue, stats), tap_stats);
 }
 
 static int
 tap_stats_reset(struct rte_eth_dev *dev)
 {
-	int i;
-	struct pmd_internals *pmd = dev->data->dev_private;
-
-	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-		pmd->rxq[i].stats.ipackets = 0;
-		pmd->rxq[i].stats.ibytes = 0;
-		pmd->rxq[i].stats.ierrors = 0;
-		pmd->rxq[i].stats.rx_nombuf = 0;
-
-		pmd->txq[i].stats.opackets = 0;
-		pmd->txq[i].stats.errs = 0;
-		pmd->txq[i].stats.obytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct tx_queue, stats),
+				      offsetof(struct rx_queue, stats));
 }
 
 static int
diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
index 5ac93f93e9..8cba9ea410 100644
--- a/drivers/net/tap/rte_eth_tap.h
+++ b/drivers/net/tap/rte_eth_tap.h
@@ -14,6 +14,7 @@
 #include <linux/if_tun.h>
 
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_ether.h>
 #include <rte_gso.h>
 #include "tap_log.h"
@@ -32,23 +33,13 @@ enum rte_tuntap_type {
 	ETH_TUNTAP_TYPE_MAX,
 };
 
-struct pkt_stats {
-	uint64_t opackets;              /* Number of output packets */
-	uint64_t ipackets;              /* Number of input packets */
-	uint64_t obytes;                /* Number of bytes on output */
-	uint64_t ibytes;                /* Number of bytes on input */
-	uint64_t errs;                  /* Number of TX error packets */
-	uint64_t ierrors;               /* Number of RX error packets */
-	uint64_t rx_nombuf;             /* Nb of RX mbuf alloc failures */
-};
-
 struct rx_queue {
 	struct rte_mempool *mp;         /* Mempool for RX packets */
 	uint32_t trigger_seen;          /* Last seen Rx trigger value */
 	uint16_t in_port;               /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-	struct pkt_stats stats;         /* Stats for this RX queue */
 	uint16_t nb_rx_desc;            /* max number of mbufs available */
+	struct rte_eth_counters stats;  /* Stats for this RX queue */
 	struct rte_eth_rxmode *rxmode;  /* RX features */
 	struct rte_mbuf *pool;          /* mbufs pool for this queue */
 	struct iovec (*iovecs)[];       /* descriptors for this queue */
@@ -59,7 +50,7 @@ struct tx_queue {
 	int type;                       /* Type field - TUN|TAP */
 	uint16_t *mtu;                  /* Pointer to MTU from dev_data */
 	uint16_t csum:1;                /* Enable checksum offloading */
-	struct pkt_stats stats;         /* Stats for this TX queue */
+	struct rte_eth_counters stats;	/* Stats for this TX queue */
 	struct rte_gso_ctx gso_ctx;     /* GSO context */
 	uint16_t out_port;              /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v3 1/7] eal: generic 64 bit counter
  2024-05-14 15:35     ` [PATCH v3 1/7] eal: generic 64 bit counter Stephen Hemminger
@ 2024-05-15  9:30       ` Morten Brørup
  2024-05-15 15:03         ` Stephen Hemminger
  2024-05-26  7:34         ` Mattias Rönnblom
  2024-05-26  6:45       ` Mattias Rönnblom
  1 sibling, 2 replies; 179+ messages in thread
From: Morten Brørup @ 2024-05-15  9:30 UTC (permalink / raw)
  To: Stephen Hemminger, dev, Mattias Rönnblom,
	Mattias Rönnblom, Ferruh Yigit, bruce.richardson

+To: @Mattias, @Ferruh, @Bruce, participants in a related discussion

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Tuesday, 14 May 2024 17.35
> 
> This header implements 64 bit counters that are NOT atomic
> but are safe against load/store splits on 32 bit platforms.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---

With a long term perspective, I consider this patch very useful.
And its 32 bit implementation can be optimized for various architectures/compilers later.


In addition, it would be "nice to have" if reset() and fetch() could be called from another thread than the thread adding to the counter.

As previously discussed [1], I think it can be done without significantly affecting fast path add() performance, by using an "offset" with Release-Consume ordering.

[1]: https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F427@smartserver.smartshare.dk/


rte_counter64_add(rte_counter64_t *counter, uint32_t val)
{
	// Write "counter" with memory_order_relaxed, so
	// it eventually becomes visible in other threads.

	rte_counter64_t ctr = *counter + val;
	rte_atomic_store_explicit(counter, ctr, rte_memory_order_relaxed);
}

rte_counter64_get(rte_counter64_t *counter, rte_counter64_t *offset)
{
	// Read "offset" with memory_order_consume, so:
	// - no reads or writes in the current thread dependent on "offset"
	//   can be reordered before this load, and
	// - writes to "counter" (a data-dependent variable)
	//   in other threads that release "offset" are visible in the current thread.

	rte_counter64_t off = rte_atomic_load_explicit(offset, rte_memory_order_consume);
	rte_counter64_t ctr = rte_atomic_load_explicit(counter, rte_memory_order_relaxed);

	return ctr - off;
}

rte_counter64_reset(rte_counter64_t *counter, rte_counter64_t *offset)
{
	// Write "offset" with memory_order_release, so
	// "counter" cannot be visible after it.

	rte_counter64_t ctr = rte_atomic_load_explicit(offset, rte_memory_order_relaxed);
	rte_atomic_store_explicit(offset, ctr, rte_memory_order_release);
}


Support for counters shared by multi threads, e.g. rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed, should be provided too:

rte_counter64_mt_add(rte_counter64_t *counter, uint32_t val)
{
	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
}


>  lib/eal/include/meson.build   |  1 +
>  lib/eal/include/rte_counter.h | 91 +++++++++++++++++++++++++++++++++++
>  2 files changed, 92 insertions(+)
>  create mode 100644 lib/eal/include/rte_counter.h
> 
> diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
> index e94b056d46..c070dd0079 100644
> --- a/lib/eal/include/meson.build
> +++ b/lib/eal/include/meson.build
> @@ -12,6 +12,7 @@ headers += files(
>          'rte_class.h',
>          'rte_common.h',
>          'rte_compat.h',
> +        'rte_counter.h',
>          'rte_debug.h',
>          'rte_dev.h',
>          'rte_devargs.h',
> diff --git a/lib/eal/include/rte_counter.h b/lib/eal/include/rte_counter.h
> new file mode 100644
> index 0000000000..8068d6d26e
> --- /dev/null
> +++ b/lib/eal/include/rte_counter.h
> @@ -0,0 +1,91 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
> + */
> +
> +#ifndef _RTE_COUNTER_H_
> +#define _RTE_COUNTER_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/**
> + * @file
> + * RTE Counter
> + *
> + * A counter is 64 bit value that is safe from split read/write
> + * on 32 bit platforms. It assumes that only one cpu at a time
> + * will update the counter, and another CPU may want to read it.
> + *
> + * This is a much weaker guarantee than full atomic variables
> + * but is faster since no locked operations are required for update.
> + */
> +
> +#include <stdatomic.h>
> +
> +#ifdef RTE_ARCH_64
> +/*
> + * On a platform that can support native 64 bit type, no special handling.
> + * These are just wrapper around 64 bit value.
> + */
> +typedef uint64_t rte_counter64_t;
> +
> +/**
> + * Add value to counter.
> + */
> +__rte_experimental
> +static inline void
> +rte_counter64_add(rte_counter64_t *counter, uint32_t val)
> +{
> +	*counter += val;
> +}
> +
> +__rte_experimental
> +static inline uint64_t
> +rte_counter64_fetch(const rte_counter64_t *counter)
> +{
> +	return *counter;
> +}
> +
> +__rte_experimental
> +static inline void
> +rte_counter64_reset(rte_counter64_t *counter)
> +{
> +	*counter = 0;
> +}
> +
> +#else
> +/*
> + * On a 32 bit platform need to use atomic to force the compler to not
> + * split 64 bit read/write.
> + */
> +typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
> +
> +__rte_experimental
> +static inline void
> +rte_counter64_add(rte_counter64_t *counter, uint32_t val)
> +{
> +	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
> +}
> +
> +__rte_experimental
> +static inline uint64_t
> +rte_counter64_fetch(const rte_counter64_t *counter)
> +{
> +	return rte_atomic_load_explicit(counter, rte_memory_order_relaxed);
> +}
> +
> +__rte_experimental
> +static inline void
> +rte_counter64_reset(rte_counter64_t *counter)
> +{
> +	rte_atomic_store_explicit(counter, 0, rte_memory_order_relaxed);
> +}
> +#endif
> +
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_COUNTER_H_ */
> --
> 2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v3 1/7] eal: generic 64 bit counter
  2024-05-15  9:30       ` Morten Brørup
@ 2024-05-15 15:03         ` Stephen Hemminger
  2024-05-15 16:18           ` Morten Brørup
  2024-05-26  7:34         ` Mattias Rönnblom
  1 sibling, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-15 15:03 UTC (permalink / raw)
  To: Morten Brørup
  Cc: dev, Mattias Rönnblom, Mattias Rönnblom, Ferruh Yigit,
	bruce.richardson

On Wed, 15 May 2024 11:30:45 +0200
Morten Brørup <mb@smartsharesystems.com> wrote:

> With a long term perspective, I consider this patch very useful.
> And its 32 bit implementation can be optimized for various architectures/compilers later.
> 
> 
> In addition, it would be "nice to have" if reset() and fetch() could be called from another thread than the thread adding to the counter.
> 
> As previously discussed [1], I think it can be done without significantly affecting fast path add() performance, by using an "offset" with Release-Consume ordering.
> 
> [1]: https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F427@smartserver.smartshare.dk/
> 


Without a specific driver use case, not sure why this added complexity is needed.
If there is a specific example, can add it later. Any atomic operation ends up
impacting the speculative execution pipeline on modern CPU's. This version
ends up being just a single add instruction on ARM and x86 64 bit.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v3 1/7] eal: generic 64 bit counter
  2024-05-15 15:03         ` Stephen Hemminger
@ 2024-05-15 16:18           ` Morten Brørup
  0 siblings, 0 replies; 179+ messages in thread
From: Morten Brørup @ 2024-05-15 16:18 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, Mattias Rönnblom, Mattias Rönnblom, Ferruh Yigit,
	bruce.richardson

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Wednesday, 15 May 2024 17.03
> 
> On Wed, 15 May 2024 11:30:45 +0200
> Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> > With a long term perspective, I consider this patch very useful.
> > And its 32 bit implementation can be optimized for various
> architectures/compilers later.
> >
> >
> > In addition, it would be "nice to have" if reset() and fetch() could
> be called from another thread than the thread adding to the counter.
> >
> > As previously discussed [1], I think it can be done without
> significantly affecting fast path add() performance, by using an
> "offset" with Release-Consume ordering.
> >
> > [1]:
> https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F427@smarts
> erver.smartshare.dk/
> >
> 
> 
> Without a specific driver use case, not sure why this added complexity
> is needed.

Our application reads the stats counters from another thread than the fast path threads. We don't pause the fast path forwarding loops to aggregate a bunch of counters.
I would guess that many other application work that way too. Especially latency sensitive applications.

> If there is a specific example, can add it later. Any atomic operation
> ends up
> impacting the speculative execution pipeline on modern CPU's. This
> version
> ends up being just a single add instruction on ARM and x86 64 bit.

I agree that everything is mostly fine on 64 bit.
I am trying to ensure that we future proof it for multi threaded applications and 32 bit architectures too.


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 0/8] Generic 64 bit counters for SW PMD's
  2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
                     ` (5 preceding siblings ...)
  2024-05-14 15:35   ` [PATCH v3 0/7] Generic SW counters Stephen Hemminger
@ 2024-05-15 23:40   ` Stephen Hemminger
  2024-05-15 23:40     ` [PATCH v4 1/8] eal: generic 64 bit counter Stephen Hemminger
                       ` (7 more replies)
  2024-05-16 15:40   ` [PATCH v5 0/9] Generic 64 bit counters Stephen Hemminger
                     ` (5 subsequent siblings)
  12 siblings, 8 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-15 23:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Adds common code for collecting basic statistics used by many
SW based PMD's such as af_packet, af_xdp, null, tap and ring.

Solves the problem where counters could tear because of
32 bit load/store on 32 bit platforms.

v4 - cleanup the comments
   - handle null pmd as well

Stephen Hemminger (8):
  eal: generic 64 bit counter
  ethdev: add common counters for statistics
  net/af_packet: use generic SW stats
  net/af_xdp: use generic SW stats
  net/pcap: use generic SW stats
  net/ring: use generic SW stats
  net/tap: use generic SW stats
  net/null: use generic SW stats

 drivers/net/af_packet/rte_eth_af_packet.c |  82 +++-----------
 drivers/net/af_xdp/rte_eth_af_xdp.c       |  98 +++++------------
 drivers/net/null/rte_eth_null.c           |  80 +++-----------
 drivers/net/pcap/pcap_ethdev.c            | 125 +++++-----------------
 drivers/net/ring/rte_eth_ring.c           |  71 +++++-------
 drivers/net/tap/rte_eth_tap.c             |  88 +++------------
 drivers/net/tap/rte_eth_tap.h             |  15 +--
 lib/eal/include/meson.build               |   1 +
 lib/eal/include/rte_counter.h             |  91 ++++++++++++++++
 lib/ethdev/ethdev_swstats.c               | 109 +++++++++++++++++++
 lib/ethdev/ethdev_swstats.h               | 124 +++++++++++++++++++++
 lib/ethdev/meson.build                    |   2 +
 lib/ethdev/version.map                    |   3 +
 13 files changed, 458 insertions(+), 431 deletions(-)
 create mode 100644 lib/eal/include/rte_counter.h
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 1/8] eal: generic 64 bit counter
  2024-05-15 23:40   ` [PATCH v4 0/8] Generic 64 bit counters for SW PMD's Stephen Hemminger
@ 2024-05-15 23:40     ` Stephen Hemminger
  2024-05-15 23:40     ` [PATCH v4 2/8] ethdev: add common counters for statistics Stephen Hemminger
                       ` (6 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-15 23:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Morten Brørup

This header implements 64 bit counters that are NOT atomic
but are safe against load/store splits on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 lib/eal/include/meson.build   |  1 +
 lib/eal/include/rte_counter.h | 91 +++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+)
 create mode 100644 lib/eal/include/rte_counter.h

diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
index e94b056d46..c070dd0079 100644
--- a/lib/eal/include/meson.build
+++ b/lib/eal/include/meson.build
@@ -12,6 +12,7 @@ headers += files(
         'rte_class.h',
         'rte_common.h',
         'rte_compat.h',
+        'rte_counter.h',
         'rte_debug.h',
         'rte_dev.h',
         'rte_devargs.h',
diff --git a/lib/eal/include/rte_counter.h b/lib/eal/include/rte_counter.h
new file mode 100644
index 0000000000..82e558bd6f
--- /dev/null
+++ b/lib/eal/include/rte_counter.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_COUNTER_H_
+#define _RTE_COUNTER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Counter
+ *
+ * A counter is 64 bit value that is safe from split read/write
+ * on 32 bit platforms. It assumes that only one cpu at a time
+ * will update the counter, and another CPU may want to read it.
+ *
+ * This is a much weaker guarantee than full atomic variables
+ * but is faster since no locked operations are required for update.
+ */
+
+#include <rte_stdatomic.h>
+
+#ifdef RTE_ARCH_64
+/*
+ * On a platform that can support native 64 bit type, no special handling.
+ * These are just wrapper around 64 bit value.
+ */
+typedef uint64_t rte_counter64_t;
+
+/**
+ * Add value to counter.
+ */
+__rte_experimental
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	*counter += val;
+}
+
+__rte_experimental
+static inline uint64_t
+rte_counter64_fetch(const rte_counter64_t *counter)
+{
+	return *counter;
+}
+
+__rte_experimental
+static inline void
+rte_counter64_reset(rte_counter64_t *counter)
+{
+	*counter = 0;
+}
+
+#else
+/*
+ * On a 32 bit platform need to use atomic to force the compler to not
+ * split 64 bit read/write.
+ */
+typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
+
+__rte_experimental
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
+}
+
+__rte_experimental
+static inline uint64_t
+rte_counter64_fetch(const rte_counter64_t *counter)
+{
+	return rte_atomic_load_explicit(counter, rte_memory_order_relaxed);
+}
+
+__rte_experimental
+static inline void
+rte_counter64_reset(rte_counter64_t *counter)
+{
+	rte_atomic_store_explicit(counter, 0, rte_memory_order_relaxed);
+}
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_COUNTER_H_ */
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 2/8] ethdev: add common counters for statistics
  2024-05-15 23:40   ` [PATCH v4 0/8] Generic 64 bit counters for SW PMD's Stephen Hemminger
  2024-05-15 23:40     ` [PATCH v4 1/8] eal: generic 64 bit counter Stephen Hemminger
@ 2024-05-15 23:40     ` Stephen Hemminger
  2024-05-15 23:40     ` [PATCH v4 3/8] net/af_packet: use generic SW stats Stephen Hemminger
                       ` (5 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-15 23:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko

Introduce common helper routines for keeping track of per-queue
statistics in SW PMD's. The code in several drivers had
copy/pasted the same code for this, but had common issues
with 64 bit counters on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/ethdev/ethdev_swstats.c | 109 +++++++++++++++++++++++++++++++
 lib/ethdev/ethdev_swstats.h | 124 ++++++++++++++++++++++++++++++++++++
 lib/ethdev/meson.build      |   2 +
 lib/ethdev/version.map      |   3 +
 4 files changed, 238 insertions(+)
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

diff --git a/lib/ethdev/ethdev_swstats.c b/lib/ethdev/ethdev_swstats.c
new file mode 100644
index 0000000000..7892b2180b
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.c
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+
+#include <rte_config.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
+#include "rte_ethdev.h"
+#include "ethdev_driver.h"
+#include "ethdev_swstats.h"
+
+int
+rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset,
+			   struct rte_eth_stats *stats)
+{
+	uint64_t packets, bytes, errors;
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		const void *txq = dev->data->tx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (txq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)txq + tx_offset);
+		packets = rte_counter64_fetch(&counters->packets);
+		bytes = rte_counter64_fetch(&counters->bytes);
+		errors = rte_counter64_fetch(&counters->errors);
+
+		/*
+		 * Prevent compiler from fetching values twice which would
+		 * cause per-queue and global statistics to not match.
+		 */
+		rte_compiler_barrier();
+
+		stats->opackets += packets;
+		stats->obytes += bytes;
+		stats->oerrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_opackets[i] = packets;
+			stats->q_obytes[i] = bytes;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		const void *rxq = dev->data->rx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)rxq + rx_offset);
+		packets = rte_counter64_fetch(&counters->packets);
+		bytes = rte_counter64_fetch(&counters->bytes);
+		errors = rte_counter64_fetch(&counters->errors);
+
+		rte_compiler_barrier();
+
+		stats->ipackets += packets;
+		stats->ibytes += bytes;
+		stats->ierrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_ipackets[i] = packets;
+			stats->q_ibytes[i] = bytes;
+		}
+	}
+
+	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
+	return 0;
+}
+
+int
+rte_eth_counters_reset(struct rte_eth_dev *dev, size_t tx_offset, size_t rx_offset)
+{
+	struct rte_eth_counters *counters;
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		void *txq  = dev->data->tx_queues[i];
+
+		if (txq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)txq + tx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		void *rxq  = dev->data->rx_queues[i];
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)rxq + rx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	return 0;
+}
diff --git a/lib/ethdev/ethdev_swstats.h b/lib/ethdev/ethdev_swstats.h
new file mode 100644
index 0000000000..808c540640
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_ETHDEV_SWSTATS_H_
+#define _RTE_ETHDEV_SWSTATS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ *
+ * Internal statistics counters for software based devices.
+ * Hardware PMD's should use the hardware counters instead.
+ *
+ * This provides a library for PMD's to keep track of packets and bytes.
+ * It is assumed that this will be used per queue and queues are not
+ * shared by lcores.
+ */
+
+#include <rte_counter.h>
+
+/**
+ * A structure to be embedded in the device driver per-queue data.
+ */
+struct rte_eth_counters {
+	rte_counter64_t	packets;	/**< Total number of packets. */
+	rte_counter64_t	bytes;		/**< Total number of bytes. */
+	rte_counter64_t	errors;		/**< Total number of packets with errors. */
+};
+
+/**
+ * @internal
+ * Increment counters for a single packet.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ * @param sz
+ *    Size of the packet in bytes.
+ */
+__rte_internal
+static inline void
+rte_eth_count_packet(struct rte_eth_counters *counters, uint32_t sz)
+{
+	rte_counter64_add(&counters->packets, 1);
+	rte_counter64_add(&counters->bytes, sz);
+}
+
+/**
+ * @internal
+ * Increment counters based on mbuf.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ * @param mbuf
+ *    Received or transmitted mbuf.
+ */
+__rte_internal
+static inline void
+rte_eth_count_mbuf(struct rte_eth_counters *counters, const struct rte_mbuf *mbuf)
+{
+	rte_eth_count_packet(counters, rte_pktmbuf_pkt_len(mbuf));
+}
+
+/**
+ * @internal
+ * Increment error counter.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ */
+__rte_internal
+static inline void
+rte_eth_count_error(struct rte_eth_counters *counters)
+{
+	rte_counter64_add(&counters->errors, 1);
+}
+
+/**
+ * @internal
+ * Retrieve the general statistics for all queues.
+ * @see rte_eth_stats_get.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @param stats
+ *   A pointer to a structure of type *rte_eth_stats* to be filled
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			       size_t tx_offset, size_t rx_offset,
+			       struct rte_eth_stats *stats);
+
+/**
+ * @internal
+ * Reset the statistics for all queues.
+ * @see rte_eth_stats_reset.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_reset(struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHDEV_SWSTATS_H_ */
diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build
index f1d2586591..7ce29a46d4 100644
--- a/lib/ethdev/meson.build
+++ b/lib/ethdev/meson.build
@@ -3,6 +3,7 @@
 
 sources = files(
         'ethdev_driver.c',
+        'ethdev_swstats.c',
         'ethdev_private.c',
         'ethdev_profile.c',
         'ethdev_trace_points.c',
@@ -42,6 +43,7 @@ driver_sdk_headers += files(
         'ethdev_driver.h',
         'ethdev_pci.h',
         'ethdev_vdev.h',
+        'ethdev_swstats.h',
 )
 
 if is_linux
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 79f6f5293b..fc595be278 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -358,4 +358,7 @@ INTERNAL {
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
 	rte_flow_fp_default_ops;
+
+	rte_eth_counters_reset;
+	rte_eth_counters_stats_get;
 };
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 3/8] net/af_packet: use generic SW stats
  2024-05-15 23:40   ` [PATCH v4 0/8] Generic 64 bit counters for SW PMD's Stephen Hemminger
  2024-05-15 23:40     ` [PATCH v4 1/8] eal: generic 64 bit counter Stephen Hemminger
  2024-05-15 23:40     ` [PATCH v4 2/8] ethdev: add common counters for statistics Stephen Hemminger
@ 2024-05-15 23:40     ` Stephen Hemminger
  2024-05-15 23:40     ` [PATCH v4 4/8] net/af_xdp: " Stephen Hemminger
                       ` (4 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-15 23:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, John W. Linville

Use the new generic SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_packet/rte_eth_af_packet.c | 82 ++++-------------------
 1 file changed, 14 insertions(+), 68 deletions(-)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 397a32db58..89b737e7dc 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -10,6 +10,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
@@ -29,6 +30,7 @@
 #include <unistd.h>
 #include <poll.h>
 
+
 #define ETH_AF_PACKET_IFACE_ARG		"iface"
 #define ETH_AF_PACKET_NUM_Q_ARG		"qpairs"
 #define ETH_AF_PACKET_BLOCKSIZE_ARG	"blocksz"
@@ -51,8 +53,7 @@ struct pkt_rx_queue {
 	uint16_t in_port;
 	uint8_t vlan_strip;
 
-	volatile unsigned long rx_pkts;
-	volatile unsigned long rx_bytes;
+	struct rte_eth_counters stats;
 };
 
 struct pkt_tx_queue {
@@ -64,11 +65,10 @@ struct pkt_tx_queue {
 	unsigned int framecount;
 	unsigned int framenum;
 
-	volatile unsigned long tx_pkts;
-	volatile unsigned long err_pkts;
-	volatile unsigned long tx_bytes;
+	struct rte_eth_counters stats;
 };
 
+
 struct pmd_internals {
 	unsigned nb_queues;
 
@@ -118,8 +118,6 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	uint8_t *pbuf;
 	struct pkt_rx_queue *pkt_q = queue;
-	uint16_t num_rx = 0;
-	unsigned long num_rx_bytes = 0;
 	unsigned int framecount, framenum;
 
 	if (unlikely(nb_pkts == 0))
@@ -164,13 +162,11 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* account for the receive frame */
 		bufs[i] = mbuf;
-		num_rx++;
-		num_rx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&pkt_q->stats, mbuf);
 	}
 	pkt_q->framenum = framenum;
-	pkt_q->rx_pkts += num_rx;
-	pkt_q->rx_bytes += num_rx_bytes;
-	return num_rx;
+
+	return i;
 }
 
 /*
@@ -205,8 +201,6 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	unsigned int framecount, framenum;
 	struct pollfd pfd;
 	struct pkt_tx_queue *pkt_q = queue;
-	uint16_t num_tx = 0;
-	unsigned long num_tx_bytes = 0;
 	int i;
 
 	if (unlikely(nb_pkts == 0))
@@ -285,8 +279,7 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			framenum = 0;
 		ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base;
 
-		num_tx++;
-		num_tx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&pkt_q->stats, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -298,15 +291,9 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		 * packets will be considered successful even though only some
 		 * are sent.
 		 */
-
-		num_tx = 0;
-		num_tx_bytes = 0;
 	}
 
 	pkt_q->framenum = framenum;
-	pkt_q->tx_pkts += num_tx;
-	pkt_q->err_pkts += i - num_tx;
-	pkt_q->tx_bytes += num_tx_bytes;
 	return i;
 }
 
@@ -386,58 +373,17 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
-		igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
-		rx_total += igb_stats->q_ipackets[i];
-		rx_bytes_total += igb_stats->q_ibytes[i];
-	}
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
-		igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
-		tx_total += igb_stats->q_opackets[i];
-		tx_err_total += internal->tx_queue[i].err_pkts;
-		tx_bytes_total += igb_stats->q_obytes[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->ibytes = rx_bytes_total;
-	igb_stats->opackets = tx_total;
-	igb_stats->oerrors = tx_err_total;
-	igb_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+					  offsetof(struct pkt_rx_queue, stats), stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->rx_queue[i].rx_pkts = 0;
-		internal->rx_queue[i].rx_bytes = 0;
-	}
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->tx_queue[i].tx_pkts = 0;
-		internal->tx_queue[i].err_pkts = 0;
-		internal->tx_queue[i].tx_bytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 static int
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 4/8] net/af_xdp: use generic SW stats
  2024-05-15 23:40   ` [PATCH v4 0/8] Generic 64 bit counters for SW PMD's Stephen Hemminger
                       ` (2 preceding siblings ...)
  2024-05-15 23:40     ` [PATCH v4 3/8] net/af_packet: use generic SW stats Stephen Hemminger
@ 2024-05-15 23:40     ` Stephen Hemminger
  2024-05-15 23:40     ` [PATCH v4 5/8] net/pcap: " Stephen Hemminger
                       ` (3 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-15 23:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Ciara Loftus

Use common code for all SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 98 ++++++++---------------------
 1 file changed, 25 insertions(+), 73 deletions(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 268a130c49..65fc2f478f 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -20,6 +20,7 @@
 #include <rte_ethdev.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
 #include <rte_string_fns.h>
@@ -120,19 +121,13 @@ struct xsk_umem_info {
 	uint32_t max_xsks;
 };
 
-struct rx_stats {
-	uint64_t rx_pkts;
-	uint64_t rx_bytes;
-	uint64_t rx_dropped;
-};
-
 struct pkt_rx_queue {
 	struct xsk_ring_cons rx;
 	struct xsk_umem_info *umem;
 	struct xsk_socket *xsk;
 	struct rte_mempool *mb_pool;
 
-	struct rx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct xsk_ring_prod fq;
 	struct xsk_ring_cons cq;
@@ -143,17 +138,11 @@ struct pkt_rx_queue {
 	int busy_budget;
 };
 
-struct tx_stats {
-	uint64_t tx_pkts;
-	uint64_t tx_bytes;
-	uint64_t tx_dropped;
-};
-
 struct pkt_tx_queue {
 	struct xsk_ring_prod tx;
 	struct xsk_umem_info *umem;
 
-	struct tx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct pkt_rx_queue *pair;
 	int xsk_queue_idx;
@@ -308,7 +297,6 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_ring_prod *fq = &rxq->fq;
 	struct xsk_umem_info *umem = rxq->umem;
 	uint32_t idx_rx = 0;
-	unsigned long rx_bytes = 0;
 	int i;
 	struct rte_mbuf *fq_bufs[ETH_AF_XDP_RX_BATCH_SIZE];
 
@@ -363,16 +351,13 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		rte_pktmbuf_pkt_len(bufs[i]) = len;
 		rte_pktmbuf_data_len(bufs[i]) = len;
-		rx_bytes += len;
+
+		rte_eth_count_mbuf(&rxq->stats, bufs[i]);
 	}
 
 	xsk_ring_cons__release(rx, nb_pkts);
 	(void)reserve_fill_queue(umem, nb_pkts, fq_bufs, fq);
 
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
-
 	return nb_pkts;
 }
 #else
@@ -384,7 +369,6 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_umem_info *umem = rxq->umem;
 	struct xsk_ring_prod *fq = &rxq->fq;
 	uint32_t idx_rx = 0;
-	unsigned long rx_bytes = 0;
 	int i;
 	uint32_t free_thresh = fq->size >> 1;
 	struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
@@ -424,16 +408,13 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		rte_ring_enqueue(umem->buf_ring, (void *)addr);
 		rte_pktmbuf_pkt_len(mbufs[i]) = len;
 		rte_pktmbuf_data_len(mbufs[i]) = len;
-		rx_bytes += len;
+		rte_eth_count_mbuf(&rxq->stats, mbufs[i]);
+
 		bufs[i] = mbufs[i];
 	}
 
 	xsk_ring_cons__release(rx, nb_pkts);
 
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
-
 	return nb_pkts;
 }
 #endif
@@ -527,9 +508,8 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct pkt_tx_queue *txq = queue;
 	struct xsk_umem_info *umem = txq->umem;
 	struct rte_mbuf *mbuf;
-	unsigned long tx_bytes = 0;
 	int i;
-	uint32_t idx_tx;
+	uint32_t idx_tx, pkt_len;
 	uint16_t count = 0;
 	struct xdp_desc *desc;
 	uint64_t addr, offset;
@@ -541,6 +521,7 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	for (i = 0; i < nb_pkts; i++) {
 		mbuf = bufs[i];
+		pkt_len = rte_pktmbuf_pkt_len(mbuf);
 
 		if (mbuf->pool == umem->mb_pool) {
 			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
@@ -589,17 +570,13 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			count++;
 		}
 
-		tx_bytes += mbuf->pkt_len;
+		rte_eth_count_packet(&txq->stats, pkt_len);
 	}
 
 out:
 	xsk_ring_prod__submit(&txq->tx, count);
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += count;
-	txq->stats.tx_bytes += tx_bytes;
-	txq->stats.tx_dropped += nb_pkts - count;
-
 	return count;
 }
 #else
@@ -610,7 +587,6 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_umem_info *umem = txq->umem;
 	struct rte_mbuf *mbuf;
 	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
-	unsigned long tx_bytes = 0;
 	int i;
 	uint32_t idx_tx;
 	struct xsk_ring_cons *cq = &txq->pair->cq;
@@ -640,7 +616,8 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		pkt = xsk_umem__get_data(umem->mz->addr,
 					 desc->addr);
 		rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len);
-		tx_bytes += mbuf->pkt_len;
+		rte_eth_qsw_update(&txq->stats, mbuf);
+
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -648,9 +625,6 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += nb_pkts;
-	txq->stats.tx_bytes += tx_bytes;
-
 	return nb_pkts;
 }
 
@@ -847,39 +821,26 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
 	struct pmd_process_private *process_private = dev->process_private;
-	struct xdp_statistics xdp_stats;
-	struct pkt_rx_queue *rxq;
-	struct pkt_tx_queue *txq;
-	socklen_t optlen;
-	int i, ret, fd;
+	unsigned int i;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		optlen = sizeof(struct xdp_statistics);
-		rxq = &internals->rx_queues[i];
-		txq = rxq->pair;
-		stats->q_ipackets[i] = rxq->stats.rx_pkts;
-		stats->q_ibytes[i] = rxq->stats.rx_bytes;
+	rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+				   offsetof(struct pkt_rx_queue, stats), stats);
 
-		stats->q_opackets[i] = txq->stats.tx_pkts;
-		stats->q_obytes[i] = txq->stats.tx_bytes;
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct xdp_statistics xdp_stats;
+		socklen_t optlen = sizeof(xdp_stats);
+		int fd;
 
-		stats->ipackets += stats->q_ipackets[i];
-		stats->ibytes += stats->q_ibytes[i];
-		stats->imissed += rxq->stats.rx_dropped;
-		stats->oerrors += txq->stats.tx_dropped;
 		fd = process_private->rxq_xsk_fds[i];
-		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
-					   &xdp_stats, &optlen) : -1;
-		if (ret != 0) {
+		if (fd < 0)
+			continue;
+		if (getsockopt(fd, SOL_XDP, XDP_STATISTICS,
+			       &xdp_stats, &optlen)  < 0) {
 			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
 			return -1;
 		}
 		stats->imissed += xdp_stats.rx_dropped;
-
-		stats->opackets += stats->q_opackets[i];
-		stats->obytes += stats->q_obytes[i];
 	}
 
 	return 0;
@@ -888,17 +849,8 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
-	int i;
-
-	for (i = 0; i < internals->queue_cnt; i++) {
-		memset(&internals->rx_queues[i].stats, 0,
-					sizeof(struct rx_stats));
-		memset(&internals->tx_queues[i].stats, 0,
-					sizeof(struct tx_stats));
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 #ifdef RTE_NET_AF_XDP_LIBBPF_XDP_ATTACH
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 5/8] net/pcap: use generic SW stats
  2024-05-15 23:40   ` [PATCH v4 0/8] Generic 64 bit counters for SW PMD's Stephen Hemminger
                       ` (3 preceding siblings ...)
  2024-05-15 23:40     ` [PATCH v4 4/8] net/af_xdp: " Stephen Hemminger
@ 2024-05-15 23:40     ` Stephen Hemminger
  2024-05-15 23:40     ` [PATCH v4 6/8] net/ring: " Stephen Hemminger
                       ` (2 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-15 23:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use common statistics for SW drivers.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/pcap/pcap_ethdev.c | 125 +++++++--------------------------
 1 file changed, 26 insertions(+), 99 deletions(-)

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index bfec085045..b1a983f871 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -11,6 +11,7 @@
 
 #include <rte_cycles.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <ethdev_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_malloc.h>
@@ -48,13 +49,6 @@ static uint8_t iface_idx;
 static uint64_t timestamp_rx_dynflag;
 static int timestamp_dynfield_offset = -1;
 
-struct queue_stat {
-	volatile unsigned long pkts;
-	volatile unsigned long bytes;
-	volatile unsigned long err_pkts;
-	volatile unsigned long rx_nombuf;
-};
-
 struct queue_missed_stat {
 	/* last value retrieved from pcap */
 	unsigned int pcap;
@@ -68,7 +62,7 @@ struct pcap_rx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
 	struct rte_mempool *mb_pool;
-	struct queue_stat rx_stat;
+	struct rte_eth_counters rx_stat;
 	struct queue_missed_stat missed_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
@@ -80,7 +74,7 @@ struct pcap_rx_queue {
 struct pcap_tx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
-	struct queue_stat tx_stat;
+	struct rte_eth_counters tx_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
 };
@@ -238,7 +232,6 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	int i;
 	struct pcap_rx_queue *pcap_q = queue;
-	uint32_t rx_bytes = 0;
 
 	if (unlikely(nb_pkts == 0))
 		return 0;
@@ -252,39 +245,35 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		if (err)
 			return i;
 
+		rte_eth_count_mbuf(&pcap_q->rx_stat, pcap_buf);
+
 		rte_memcpy(rte_pktmbuf_mtod(bufs[i], void *),
 				rte_pktmbuf_mtod(pcap_buf, void *),
 				pcap_buf->data_len);
 		bufs[i]->data_len = pcap_buf->data_len;
 		bufs[i]->pkt_len = pcap_buf->pkt_len;
 		bufs[i]->port = pcap_q->port_id;
-		rx_bytes += pcap_buf->data_len;
+
 
 		/* Enqueue packet back on ring to allow infinite rx. */
 		rte_ring_enqueue(pcap_q->pkts, pcap_buf);
 	}
 
-	pcap_q->rx_stat.pkts += i;
-	pcap_q->rx_stat.bytes += rx_bytes;
-
 	return i;
 }
 
 static uint16_t
 eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
+	struct pcap_rx_queue *pcap_q = queue;
+	struct rte_eth_dev *dev = &rte_eth_devices[pcap_q->port_id];
+	struct pmd_process_private *pp = dev->process_private;
+	pcap_t *pcap = pp->rx_pcap[pcap_q->queue_id];
 	unsigned int i;
 	struct pcap_pkthdr header;
-	struct pmd_process_private *pp;
 	const u_char *packet;
 	struct rte_mbuf *mbuf;
-	struct pcap_rx_queue *pcap_q = queue;
 	uint16_t num_rx = 0;
-	uint32_t rx_bytes = 0;
-	pcap_t *pcap;
-
-	pp = rte_eth_devices[pcap_q->port_id].process_private;
-	pcap = pp->rx_pcap[pcap_q->queue_id];
 
 	if (unlikely(pcap == NULL || nb_pkts == 0))
 		return 0;
@@ -300,7 +289,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		mbuf = rte_pktmbuf_alloc(pcap_q->mb_pool);
 		if (unlikely(mbuf == NULL)) {
-			pcap_q->rx_stat.rx_nombuf++;
+			++dev->data->rx_mbuf_alloc_failed;
 			break;
 		}
 
@@ -315,7 +304,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 						       mbuf,
 						       packet,
 						       header.caplen) == -1)) {
-				pcap_q->rx_stat.err_pkts++;
+				rte_eth_count_error(&pcap_q->rx_stat);
 				rte_pktmbuf_free(mbuf);
 				break;
 			}
@@ -329,11 +318,10 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		mbuf->ol_flags |= timestamp_rx_dynflag;
 		mbuf->port = pcap_q->port_id;
 		bufs[num_rx] = mbuf;
+
+		rte_eth_count_mbuf(&pcap_q->rx_stat, mbuf);
 		num_rx++;
-		rx_bytes += header.caplen;
 	}
-	pcap_q->rx_stat.pkts += num_rx;
-	pcap_q->rx_stat.bytes += rx_bytes;
 
 	return num_rx;
 }
@@ -379,8 +367,6 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *dumper_q = queue;
-	uint16_t num_tx = 0;
-	uint32_t tx_bytes = 0;
 	struct pcap_pkthdr header;
 	pcap_dumper_t *dumper;
 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
@@ -412,8 +398,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		pcap_dump((u_char *)dumper, &header,
 			rte_pktmbuf_read(mbuf, 0, caplen, temp_data));
 
-		num_tx++;
-		tx_bytes += caplen;
+		rte_eth_count_mbuf(&dumper_q->tx_stat, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -423,9 +408,6 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	 * we flush the pcap dumper within each burst.
 	 */
 	pcap_dump_flush(dumper);
-	dumper_q->tx_stat.pkts += num_tx;
-	dumper_q->tx_stat.bytes += tx_bytes;
-	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
 
 	return nb_pkts;
 }
@@ -437,20 +419,16 @@ static uint16_t
 eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	unsigned int i;
-	uint32_t tx_bytes = 0;
 	struct pcap_tx_queue *tx_queue = queue;
 
 	if (unlikely(nb_pkts == 0))
 		return 0;
 
 	for (i = 0; i < nb_pkts; i++) {
-		tx_bytes += bufs[i]->pkt_len;
+		rte_eth_count_mbuf(&tx_queue->tx_stat, bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	tx_queue->tx_stat.pkts += nb_pkts;
-	tx_queue->tx_stat.bytes += tx_bytes;
-
 	return i;
 }
 
@@ -465,8 +443,6 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *tx_queue = queue;
-	uint16_t num_tx = 0;
-	uint32_t tx_bytes = 0;
 	pcap_t *pcap;
 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
 	size_t len;
@@ -497,15 +473,11 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			rte_pktmbuf_read(mbuf, 0, len, temp_data), len);
 		if (unlikely(ret != 0))
 			break;
-		num_tx++;
-		tx_bytes += len;
+
+		rte_eth_count_mbuf(&tx_queue->tx_stat, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
-	tx_queue->tx_stat.pkts += num_tx;
-	tx_queue->tx_stat.bytes += tx_bytes;
-	tx_queue->tx_stat.err_pkts += i - num_tx;
-
 	return i;
 }
 
@@ -746,41 +718,12 @@ static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned int i;
-	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
-	unsigned long rx_missed_total = 0;
-	unsigned long rx_nombuf_total = 0, rx_err_total = 0;
-	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
-	unsigned long tx_packets_err_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_queue[i].rx_stat.pkts;
-		stats->q_ibytes[i] = internal->rx_queue[i].rx_stat.bytes;
-		rx_nombuf_total += internal->rx_queue[i].rx_stat.rx_nombuf;
-		rx_err_total += internal->rx_queue[i].rx_stat.err_pkts;
-		rx_packets_total += stats->q_ipackets[i];
-		rx_bytes_total += stats->q_ibytes[i];
-		rx_missed_total += queue_missed_stat_get(dev, i);
-	}
 
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_queue[i].tx_stat.pkts;
-		stats->q_obytes[i] = internal->tx_queue[i].tx_stat.bytes;
-		tx_packets_total += stats->q_opackets[i];
-		tx_bytes_total += stats->q_obytes[i];
-		tx_packets_err_total += internal->tx_queue[i].tx_stat.err_pkts;
-	}
+	rte_eth_counters_stats_get(dev, offsetof(struct pcap_tx_queue, tx_stat),
+				   offsetof(struct pcap_rx_queue, rx_stat), stats);
 
-	stats->ipackets = rx_packets_total;
-	stats->ibytes = rx_bytes_total;
-	stats->imissed = rx_missed_total;
-	stats->ierrors = rx_err_total;
-	stats->rx_nombuf = rx_nombuf_total;
-	stats->opackets = tx_packets_total;
-	stats->obytes = tx_bytes_total;
-	stats->oerrors = tx_packets_err_total;
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		stats->imissed += queue_missed_stat_get(dev, i);
 
 	return 0;
 }
@@ -789,21 +732,12 @@ static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
 	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		internal->rx_queue[i].rx_stat.pkts = 0;
-		internal->rx_queue[i].rx_stat.bytes = 0;
-		internal->rx_queue[i].rx_stat.err_pkts = 0;
-		internal->rx_queue[i].rx_stat.rx_nombuf = 0;
-		queue_missed_stat_reset(dev, i);
-	}
+	rte_eth_counters_reset(dev, offsetof(struct pcap_tx_queue, tx_stat),
+			       offsetof(struct pcap_rx_queue, rx_stat));
 
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		internal->tx_queue[i].tx_stat.pkts = 0;
-		internal->tx_queue[i].tx_stat.bytes = 0;
-		internal->tx_queue[i].tx_stat.err_pkts = 0;
-	}
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		queue_missed_stat_reset(dev, i);
 
 	return 0;
 }
@@ -929,13 +863,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 				pcap_pkt_count);
 			return -EINVAL;
 		}
-
-		/*
-		 * Reset the stats for this queue since eth_pcap_rx calls above
-		 * didn't result in the application receiving packets.
-		 */
-		pcap_q->rx_stat.pkts = 0;
-		pcap_q->rx_stat.bytes = 0;
 	}
 
 	return 0;
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 6/8] net/ring: use generic SW stats
  2024-05-15 23:40   ` [PATCH v4 0/8] Generic 64 bit counters for SW PMD's Stephen Hemminger
                       ` (4 preceding siblings ...)
  2024-05-15 23:40     ` [PATCH v4 5/8] net/pcap: " Stephen Hemminger
@ 2024-05-15 23:40     ` Stephen Hemminger
  2024-05-15 23:40     ` [PATCH v4 7/8] net/tap: " Stephen Hemminger
  2024-05-15 23:41     ` [PATCH v4 8/8] net/null: " Stephen Hemminger
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-15 23:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Bruce Richardson

Use generic per-queue infrastructure.
This also fixes bug where ring code was not accounting
for bytes.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/ring/rte_eth_ring.c | 71 +++++++++++++--------------------
 1 file changed, 28 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index 48953dd7a0..85f14dd679 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -7,6 +7,7 @@
 #include "rte_eth_ring.h"
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <rte_os_shim.h>
@@ -44,8 +45,8 @@ enum dev_action {
 
 struct ring_queue {
 	struct rte_ring *rng;
-	uint64_t rx_pkts;
-	uint64_t tx_pkts;
+
+	struct rte_eth_counters stats;
 };
 
 struct pmd_internals {
@@ -77,12 +78,13 @@ eth_ring_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
 	struct ring_queue *r = q;
-	const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SC_DEQ)
-		r->rx_pkts += nb_rx;
-	else
-		__atomic_fetch_add(&r->rx_pkts, nb_rx, __ATOMIC_RELAXED);
+	uint16_t i, nb_rx;
+
+	nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	for (i = 0; i < nb_rx; i++)
+		rte_eth_count_mbuf(&r->stats, bufs[i]);
+
 	return nb_rx;
 }
 
@@ -90,13 +92,20 @@ static uint16_t
 eth_ring_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
+	uint32_t *sizes;
 	struct ring_queue *r = q;
-	const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SP_ENQ)
-		r->tx_pkts += nb_tx;
-	else
-		__atomic_fetch_add(&r->tx_pkts, nb_tx, __ATOMIC_RELAXED);
+	uint16_t i, nb_tx;
+
+	sizes = alloca(sizeof(uint32_t) * nb_bufs);
+
+	for (i = 0; i < nb_bufs; i++)
+		sizes[i] = rte_pktmbuf_pkt_len(bufs[i]);
+
+	nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	for (i = 0; i < nb_tx; i++)
+		rte_eth_count_packet(&r->stats, sizes[i]);
+
 	return nb_tx;
 }
 
@@ -193,40 +202,16 @@ eth_dev_info(struct rte_eth_dev *dev,
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
-		rx_total += stats->q_ipackets[i];
-	}
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
-		tx_total += stats->q_opackets[i];
-	}
-
-	stats->ipackets = rx_total;
-	stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct ring_queue, stats),
+					  offsetof(struct ring_queue, stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		internal->rx_ring_queues[i].rx_pkts = 0;
-	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		internal->tx_ring_queues[i].tx_pkts = 0;
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct ring_queue, stats),
+				      offsetof(struct ring_queue, stats));
 }
 
 static void
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 7/8] net/tap: use generic SW stats
  2024-05-15 23:40   ` [PATCH v4 0/8] Generic 64 bit counters for SW PMD's Stephen Hemminger
                       ` (5 preceding siblings ...)
  2024-05-15 23:40     ` [PATCH v4 6/8] net/ring: " Stephen Hemminger
@ 2024-05-15 23:40     ` Stephen Hemminger
  2024-05-15 23:41     ` [PATCH v4 8/8] net/null: " Stephen Hemminger
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-15 23:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use new common sw statistics.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/tap/rte_eth_tap.c | 88 ++++++-----------------------------
 drivers/net/tap/rte_eth_tap.h | 15 ++----
 2 files changed, 18 insertions(+), 85 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 69d9da695b..f87979da4f 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -432,7 +432,6 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rx_queue *rxq = queue;
 	struct pmd_process_private *process_private;
 	uint16_t num_rx;
-	unsigned long num_rx_bytes = 0;
 	uint32_t trigger = tap_trigger;
 
 	if (trigger == rxq->trigger_seen)
@@ -455,7 +454,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* Packet couldn't fit in the provided mbuf */
 		if (unlikely(rxq->pi.flags & TUN_PKT_STRIP)) {
-			rxq->stats.ierrors++;
+			rte_eth_count_error(&rxq->stats);
 			continue;
 		}
 
@@ -467,7 +466,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
 
 			if (unlikely(!buf)) {
-				rxq->stats.rx_nombuf++;
+				struct rte_eth_dev *dev = &rte_eth_devices[rxq->in_port];
+				++dev->data->rx_mbuf_alloc_failed;
+
 				/* No new buf has been allocated: do nothing */
 				if (!new_tail || !seg)
 					goto end;
@@ -509,11 +510,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* account for the receive frame */
 		bufs[num_rx++] = mbuf;
-		num_rx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&rxq->stats, mbuf);
 	}
 end:
-	rxq->stats.ipackets += num_rx;
-	rxq->stats.ibytes += num_rx_bytes;
 
 	if (trigger && num_rx < nb_pkts)
 		rxq->trigger_seen = trigger;
@@ -523,8 +522,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 static inline int
 tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
-			struct rte_mbuf **pmbufs,
-			uint16_t *num_packets, unsigned long *num_tx_bytes)
+		struct rte_mbuf **pmbufs)
 {
 	struct pmd_process_private *process_private;
 	int i;
@@ -647,8 +645,7 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
 		if (n <= 0)
 			return -1;
 
-		(*num_packets)++;
-		(*num_tx_bytes) += rte_pktmbuf_pkt_len(mbuf);
+		rte_eth_count_mbuf(&txq->stats, mbuf);
 	}
 	return 0;
 }
@@ -660,8 +657,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	struct tx_queue *txq = queue;
 	uint16_t num_tx = 0;
-	uint16_t num_packets = 0;
-	unsigned long num_tx_bytes = 0;
 	uint32_t max_size;
 	int i;
 
@@ -693,7 +688,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			tso_segsz = mbuf_in->tso_segsz + hdrs_len;
 			if (unlikely(tso_segsz == hdrs_len) ||
 				tso_segsz > *txq->mtu) {
-				txq->stats.errs++;
+				rte_eth_count_error(&txq->stats);
 				break;
 			}
 			gso_ctx->gso_size = tso_segsz;
@@ -728,10 +723,10 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			num_mbufs = 1;
 		}
 
-		ret = tap_write_mbufs(txq, num_mbufs, mbuf,
-				&num_packets, &num_tx_bytes);
+		ret = tap_write_mbufs(txq, num_mbufs, mbuf);
 		if (ret == -1) {
-			txq->stats.errs++;
+			rte_eth_count_error(&txq->stats);
+
 			/* free tso mbufs */
 			if (num_tso_mbufs > 0)
 				rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs);
@@ -749,10 +744,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		}
 	}
 
-	txq->stats.opackets += num_packets;
-	txq->stats.errs += nb_pkts - num_tx;
-	txq->stats.obytes += num_tx_bytes;
-
 	return num_tx;
 }
 
@@ -1055,64 +1046,15 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
 {
-	unsigned int i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	unsigned long rx_nombuf = 0, ierrors = 0;
-	const struct pmd_internals *pmd = dev->data->dev_private;
-
-	/* rx queue statistics */
-	imax = (dev->data->nb_rx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_rx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
-		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
-		rx_total += tap_stats->q_ipackets[i];
-		rx_bytes_total += tap_stats->q_ibytes[i];
-		rx_nombuf += pmd->rxq[i].stats.rx_nombuf;
-		ierrors += pmd->rxq[i].stats.ierrors;
-	}
-
-	/* tx queue statistics */
-	imax = (dev->data->nb_tx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_tx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
-		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
-		tx_total += tap_stats->q_opackets[i];
-		tx_err_total += pmd->txq[i].stats.errs;
-		tx_bytes_total += tap_stats->q_obytes[i];
-	}
-
-	tap_stats->ipackets = rx_total;
-	tap_stats->ibytes = rx_bytes_total;
-	tap_stats->ierrors = ierrors;
-	tap_stats->rx_nombuf = rx_nombuf;
-	tap_stats->opackets = tx_total;
-	tap_stats->oerrors = tx_err_total;
-	tap_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct tx_queue, stats),
+					  offsetof(struct rx_queue, stats), tap_stats);
 }
 
 static int
 tap_stats_reset(struct rte_eth_dev *dev)
 {
-	int i;
-	struct pmd_internals *pmd = dev->data->dev_private;
-
-	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-		pmd->rxq[i].stats.ipackets = 0;
-		pmd->rxq[i].stats.ibytes = 0;
-		pmd->rxq[i].stats.ierrors = 0;
-		pmd->rxq[i].stats.rx_nombuf = 0;
-
-		pmd->txq[i].stats.opackets = 0;
-		pmd->txq[i].stats.errs = 0;
-		pmd->txq[i].stats.obytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct tx_queue, stats),
+				      offsetof(struct rx_queue, stats));
 }
 
 static int
diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
index 5ac93f93e9..8cba9ea410 100644
--- a/drivers/net/tap/rte_eth_tap.h
+++ b/drivers/net/tap/rte_eth_tap.h
@@ -14,6 +14,7 @@
 #include <linux/if_tun.h>
 
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_ether.h>
 #include <rte_gso.h>
 #include "tap_log.h"
@@ -32,23 +33,13 @@ enum rte_tuntap_type {
 	ETH_TUNTAP_TYPE_MAX,
 };
 
-struct pkt_stats {
-	uint64_t opackets;              /* Number of output packets */
-	uint64_t ipackets;              /* Number of input packets */
-	uint64_t obytes;                /* Number of bytes on output */
-	uint64_t ibytes;                /* Number of bytes on input */
-	uint64_t errs;                  /* Number of TX error packets */
-	uint64_t ierrors;               /* Number of RX error packets */
-	uint64_t rx_nombuf;             /* Nb of RX mbuf alloc failures */
-};
-
 struct rx_queue {
 	struct rte_mempool *mp;         /* Mempool for RX packets */
 	uint32_t trigger_seen;          /* Last seen Rx trigger value */
 	uint16_t in_port;               /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-	struct pkt_stats stats;         /* Stats for this RX queue */
 	uint16_t nb_rx_desc;            /* max number of mbufs available */
+	struct rte_eth_counters stats;  /* Stats for this RX queue */
 	struct rte_eth_rxmode *rxmode;  /* RX features */
 	struct rte_mbuf *pool;          /* mbufs pool for this queue */
 	struct iovec (*iovecs)[];       /* descriptors for this queue */
@@ -59,7 +50,7 @@ struct tx_queue {
 	int type;                       /* Type field - TUN|TAP */
 	uint16_t *mtu;                  /* Pointer to MTU from dev_data */
 	uint16_t csum:1;                /* Enable checksum offloading */
-	struct pkt_stats stats;         /* Stats for this TX queue */
+	struct rte_eth_counters stats;	/* Stats for this TX queue */
 	struct rte_gso_ctx gso_ctx;     /* GSO context */
 	uint16_t out_port;              /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 8/8] net/null: use generic SW stats
  2024-05-15 23:40   ` [PATCH v4 0/8] Generic 64 bit counters for SW PMD's Stephen Hemminger
                       ` (6 preceding siblings ...)
  2024-05-15 23:40     ` [PATCH v4 7/8] net/tap: " Stephen Hemminger
@ 2024-05-15 23:41     ` Stephen Hemminger
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-15 23:41 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Tetsuya Mukawa

Use the new common code for statistics.
This also fixes the bug that this driver was not accounting
for bytes.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/null/rte_eth_null.c | 80 +++++++--------------------------
 1 file changed, 17 insertions(+), 63 deletions(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 7c46004f1e..7786982732 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -8,6 +8,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <bus_vdev_driver.h>
@@ -37,8 +38,8 @@ struct null_queue {
 	struct rte_mempool *mb_pool;
 	struct rte_mbuf *dummy_packet;
 
-	uint64_t rx_pkts;
-	uint64_t tx_pkts;
+	struct rte_eth_counters tx_stats;
+	struct rte_eth_counters rx_stats;
 };
 
 struct pmd_options {
@@ -99,11 +100,9 @@ eth_null_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		bufs[i]->data_len = (uint16_t)packet_size;
 		bufs[i]->pkt_len = packet_size;
 		bufs[i]->port = h->internals->port_id;
+		rte_eth_count_mbuf(&h->rx_stats, bufs[i]);
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
-
 	return i;
 }
 
@@ -127,11 +126,9 @@ eth_null_copy_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		bufs[i]->data_len = (uint16_t)packet_size;
 		bufs[i]->pkt_len = packet_size;
 		bufs[i]->port = h->internals->port_id;
+		rte_eth_count_mbuf(&h->rx_stats, bufs[i]);
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
-
 	return i;
 }
 
@@ -151,11 +148,10 @@ eth_null_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	if ((q == NULL) || (bufs == NULL))
 		return 0;
 
-	for (i = 0; i < nb_bufs; i++)
+	for (i = 0; i < nb_bufs; i++) {
+		rte_eth_count_mbuf(&h->tx_stats, bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
-
-	/* NOTE: review for potential ordering optimization */
-	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
+	}
 
 	return i;
 }
@@ -174,12 +170,10 @@ eth_null_copy_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	for (i = 0; i < nb_bufs; i++) {
 		rte_memcpy(h->dummy_packet, rte_pktmbuf_mtod(bufs[i], void *),
 					packet_size);
+		rte_eth_count_mbuf(&h->tx_stats, bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
-
 	return i;
 }
 
@@ -322,60 +316,20 @@ eth_dev_info(struct rte_eth_dev *dev,
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i, num_stats;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal;
-
-	if ((dev == NULL) || (igb_stats == NULL))
-		return -EINVAL;
-
-	internal = dev->data->dev_private;
-	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
-			RTE_MIN(dev->data->nb_rx_queues,
-				RTE_DIM(internal->rx_null_queues)));
-	for (i = 0; i < num_stats; i++) {
-		/* NOTE: review for atomic access */
-		igb_stats->q_ipackets[i] =
-			internal->rx_null_queues[i].rx_pkts;
-		rx_total += igb_stats->q_ipackets[i];
-	}
-
-	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
-			RTE_MIN(dev->data->nb_tx_queues,
-				RTE_DIM(internal->tx_null_queues)));
-	for (i = 0; i < num_stats; i++) {
-		/* NOTE: review for atomic access */
-		igb_stats->q_opackets[i] =
-			internal->tx_null_queues[i].tx_pkts;
-		tx_total += igb_stats->q_opackets[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev,
+					  offsetof(struct null_queue, tx_stats),
+					  offsetof(struct null_queue, rx_stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal;
-
-	if (dev == NULL)
-		return -EINVAL;
-
-	internal = dev->data->dev_private;
-	for (i = 0; i < RTE_DIM(internal->rx_null_queues); i++)
-		/* NOTE: review for atomic access */
-		internal->rx_null_queues[i].rx_pkts = 0;
-	for (i = 0; i < RTE_DIM(internal->tx_null_queues); i++)
-		/* NOTE: review for atomic access */
-		internal->tx_null_queues[i].tx_pkts = 0;
-
-	return 0;
+	return rte_eth_counters_reset(dev,
+				      offsetof(struct null_queue, tx_stats),
+				      offsetof(struct null_queue, rx_stats));
 }
 
 static void
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v5 0/9] Generic 64 bit counters
  2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
                     ` (6 preceding siblings ...)
  2024-05-15 23:40   ` [PATCH v4 0/8] Generic 64 bit counters for SW PMD's Stephen Hemminger
@ 2024-05-16 15:40   ` Stephen Hemminger
  2024-05-16 15:40     ` [PATCH v5 1/9] eal: generic 64 bit counter Stephen Hemminger
                       ` (8 more replies)
  2024-05-17  0:12   ` [PATCH v6 0/9] Generic 64 bit counters for SW drivers Stephen Hemminger
                     ` (4 subsequent siblings)
  12 siblings, 9 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-16 15:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Adds common code for collecting basic statistics used by many
SW based PMD's such as af_packet, af_xdp, null, tap and ring.

Solves the problem where counters could tear because of
32 bit load/store on 32 bit platforms.

v5 - use consume/release memory order for update on 32 bit case
   - initialize mbuf in ring test which was causing byte counts
     to be messed up.

Stephen Hemminger (9):
  eal: generic 64 bit counter
  ethdev: add common counters for statistics
  net/af_packet: use generic SW stats
  net/af_xdp: use generic SW stats
  net/pcap: use generic SW stats
  test/pmd_ring: initialize mbufs
  net/ring: use generic SW stats
  net/tap: use generic SW stats
  net/null: use generic SW stats

 app/test/test_pmd_ring.c                  |  22 ++--
 drivers/net/af_packet/rte_eth_af_packet.c |  82 +++-----------
 drivers/net/af_xdp/rte_eth_af_xdp.c       |  98 +++++------------
 drivers/net/null/rte_eth_null.c           |  80 +++-----------
 drivers/net/pcap/pcap_ethdev.c            | 125 +++++-----------------
 drivers/net/ring/rte_eth_ring.c           |  71 +++++-------
 drivers/net/tap/rte_eth_tap.c             |  88 +++------------
 drivers/net/tap/rte_eth_tap.h             |  15 +--
 lib/eal/include/meson.build               |   1 +
 lib/eal/include/rte_counter.h             |  98 +++++++++++++++++
 lib/ethdev/ethdev_swstats.c               | 109 +++++++++++++++++++
 lib/ethdev/ethdev_swstats.h               | 124 +++++++++++++++++++++
 lib/ethdev/meson.build                    |   2 +
 lib/ethdev/version.map                    |   3 +
 14 files changed, 480 insertions(+), 438 deletions(-)
 create mode 100644 lib/eal/include/rte_counter.h
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v5 1/9] eal: generic 64 bit counter
  2024-05-16 15:40   ` [PATCH v5 0/9] Generic 64 bit counters Stephen Hemminger
@ 2024-05-16 15:40     ` Stephen Hemminger
  2024-05-16 18:22       ` Wathsala Wathawana Vithanage
  2024-05-16 15:40     ` [PATCH v5 2/9] ethdev: add common counters for statistics Stephen Hemminger
                       ` (7 subsequent siblings)
  8 siblings, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-16 15:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Morten Brørup

This header implements 64 bit counters that are NOT atomic
but are safe against load/store splits on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 lib/eal/include/meson.build   |  1 +
 lib/eal/include/rte_counter.h | 98 +++++++++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+)
 create mode 100644 lib/eal/include/rte_counter.h

diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
index e94b056d46..c070dd0079 100644
--- a/lib/eal/include/meson.build
+++ b/lib/eal/include/meson.build
@@ -12,6 +12,7 @@ headers += files(
         'rte_class.h',
         'rte_common.h',
         'rte_compat.h',
+        'rte_counter.h',
         'rte_debug.h',
         'rte_dev.h',
         'rte_devargs.h',
diff --git a/lib/eal/include/rte_counter.h b/lib/eal/include/rte_counter.h
new file mode 100644
index 0000000000..d623195d63
--- /dev/null
+++ b/lib/eal/include/rte_counter.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_COUNTER_H_
+#define _RTE_COUNTER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Counter
+ *
+ * A counter is 64 bit value that is safe from split read/write
+ * on 32 bit platforms. It assumes that only one cpu at a time
+ * will update the counter, and another CPU may want to read it.
+ *
+ * This is a much weaker guarantee than full atomic variables
+ * but is faster since no locked operations are required for update.
+ */
+
+#ifdef RTE_ARCH_64
+/*
+ * On a platform that can support native 64 bit type, no special handling.
+ * These are just wrapper around 64 bit value.
+ */
+typedef uint64_t rte_counter64_t;
+
+/**
+ * Add value to counter.
+ */
+__rte_experimental
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	*counter += val;
+}
+
+__rte_experimental
+static inline uint64_t
+rte_counter64_fetch(const rte_counter64_t *counter)
+{
+	return *counter;
+}
+
+__rte_experimental
+static inline void
+rte_counter64_set(rte_counter64_t *counter, uint64_t val)
+{
+	*counter = val;
+}
+
+#else
+
+#include <rte_stdatomic.h>
+
+/*
+ * On a 32 bit platform need to use atomic to force the compler to not
+ * split 64 bit read/write.
+ */
+typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
+
+__rte_experimental
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
+}
+
+__rte_experimental
+static inline uint64_t
+rte_counter64_fetch(const rte_counter64_t *counter)
+{
+	return rte_atomic_load_explicit(counter, rte_memory_order_consume);
+}
+
+__rte_experimental
+static inline void
+rte_counter64_set(rte_counter64_t *counter, uint64_t val)
+{
+	rte_atomic_store_explicit(counter, val, rte_memory_order_release);
+}
+#endif
+
+__rte_experimental
+static inline void
+rte_counter64_reset(rte_counter64_t *counter)
+{
+	rte_counter64_set(counter, 0);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_COUNTER_H_ */
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v5 2/9] ethdev: add common counters for statistics
  2024-05-16 15:40   ` [PATCH v5 0/9] Generic 64 bit counters Stephen Hemminger
  2024-05-16 15:40     ` [PATCH v5 1/9] eal: generic 64 bit counter Stephen Hemminger
@ 2024-05-16 15:40     ` Stephen Hemminger
  2024-05-16 18:30       ` Wathsala Wathawana Vithanage
  2024-05-16 15:40     ` [PATCH v5 3/9] net/af_packet: use generic SW stats Stephen Hemminger
                       ` (6 subsequent siblings)
  8 siblings, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-16 15:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko

Introduce common helper routines for keeping track of per-queue
statistics in SW PMD's. The code in several drivers had
copy/pasted the same code for this, but had common issues
with 64 bit counters on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/ethdev/ethdev_swstats.c | 109 +++++++++++++++++++++++++++++++
 lib/ethdev/ethdev_swstats.h | 124 ++++++++++++++++++++++++++++++++++++
 lib/ethdev/meson.build      |   2 +
 lib/ethdev/version.map      |   3 +
 4 files changed, 238 insertions(+)
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

diff --git a/lib/ethdev/ethdev_swstats.c b/lib/ethdev/ethdev_swstats.c
new file mode 100644
index 0000000000..7892b2180b
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.c
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+
+#include <rte_config.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
+#include "rte_ethdev.h"
+#include "ethdev_driver.h"
+#include "ethdev_swstats.h"
+
+int
+rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset,
+			   struct rte_eth_stats *stats)
+{
+	uint64_t packets, bytes, errors;
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		const void *txq = dev->data->tx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (txq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)txq + tx_offset);
+		packets = rte_counter64_fetch(&counters->packets);
+		bytes = rte_counter64_fetch(&counters->bytes);
+		errors = rte_counter64_fetch(&counters->errors);
+
+		/*
+		 * Prevent compiler from fetching values twice which would
+		 * cause per-queue and global statistics to not match.
+		 */
+		rte_compiler_barrier();
+
+		stats->opackets += packets;
+		stats->obytes += bytes;
+		stats->oerrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_opackets[i] = packets;
+			stats->q_obytes[i] = bytes;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		const void *rxq = dev->data->rx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)rxq + rx_offset);
+		packets = rte_counter64_fetch(&counters->packets);
+		bytes = rte_counter64_fetch(&counters->bytes);
+		errors = rte_counter64_fetch(&counters->errors);
+
+		rte_compiler_barrier();
+
+		stats->ipackets += packets;
+		stats->ibytes += bytes;
+		stats->ierrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_ipackets[i] = packets;
+			stats->q_ibytes[i] = bytes;
+		}
+	}
+
+	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
+	return 0;
+}
+
+int
+rte_eth_counters_reset(struct rte_eth_dev *dev, size_t tx_offset, size_t rx_offset)
+{
+	struct rte_eth_counters *counters;
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		void *txq  = dev->data->tx_queues[i];
+
+		if (txq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)txq + tx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		void *rxq  = dev->data->rx_queues[i];
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)rxq + rx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	return 0;
+}
diff --git a/lib/ethdev/ethdev_swstats.h b/lib/ethdev/ethdev_swstats.h
new file mode 100644
index 0000000000..808c540640
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_ETHDEV_SWSTATS_H_
+#define _RTE_ETHDEV_SWSTATS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ *
+ * Internal statistics counters for software based devices.
+ * Hardware PMD's should use the hardware counters instead.
+ *
+ * This provides a library for PMD's to keep track of packets and bytes.
+ * It is assumed that this will be used per queue and queues are not
+ * shared by lcores.
+ */
+
+#include <rte_counter.h>
+
+/**
+ * A structure to be embedded in the device driver per-queue data.
+ */
+struct rte_eth_counters {
+	rte_counter64_t	packets;	/**< Total number of packets. */
+	rte_counter64_t	bytes;		/**< Total number of bytes. */
+	rte_counter64_t	errors;		/**< Total number of packets with errors. */
+};
+
+/**
+ * @internal
+ * Increment counters for a single packet.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ * @param sz
+ *    Size of the packet in bytes.
+ */
+__rte_internal
+static inline void
+rte_eth_count_packet(struct rte_eth_counters *counters, uint32_t sz)
+{
+	rte_counter64_add(&counters->packets, 1);
+	rte_counter64_add(&counters->bytes, sz);
+}
+
+/**
+ * @internal
+ * Increment counters based on mbuf.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ * @param mbuf
+ *    Received or transmitted mbuf.
+ */
+__rte_internal
+static inline void
+rte_eth_count_mbuf(struct rte_eth_counters *counters, const struct rte_mbuf *mbuf)
+{
+	rte_eth_count_packet(counters, rte_pktmbuf_pkt_len(mbuf));
+}
+
+/**
+ * @internal
+ * Increment error counter.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ */
+__rte_internal
+static inline void
+rte_eth_count_error(struct rte_eth_counters *counters)
+{
+	rte_counter64_add(&counters->errors, 1);
+}
+
+/**
+ * @internal
+ * Retrieve the general statistics for all queues.
+ * @see rte_eth_stats_get.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @param stats
+ *   A pointer to a structure of type *rte_eth_stats* to be filled
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			       size_t tx_offset, size_t rx_offset,
+			       struct rte_eth_stats *stats);
+
+/**
+ * @internal
+ * Reset the statistics for all queues.
+ * @see rte_eth_stats_reset.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_reset(struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHDEV_SWSTATS_H_ */
diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build
index f1d2586591..7ce29a46d4 100644
--- a/lib/ethdev/meson.build
+++ b/lib/ethdev/meson.build
@@ -3,6 +3,7 @@
 
 sources = files(
         'ethdev_driver.c',
+        'ethdev_swstats.c',
         'ethdev_private.c',
         'ethdev_profile.c',
         'ethdev_trace_points.c',
@@ -42,6 +43,7 @@ driver_sdk_headers += files(
         'ethdev_driver.h',
         'ethdev_pci.h',
         'ethdev_vdev.h',
+        'ethdev_swstats.h',
 )
 
 if is_linux
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 79f6f5293b..fc595be278 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -358,4 +358,7 @@ INTERNAL {
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
 	rte_flow_fp_default_ops;
+
+	rte_eth_counters_reset;
+	rte_eth_counters_stats_get;
 };
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v5 3/9] net/af_packet: use generic SW stats
  2024-05-16 15:40   ` [PATCH v5 0/9] Generic 64 bit counters Stephen Hemminger
  2024-05-16 15:40     ` [PATCH v5 1/9] eal: generic 64 bit counter Stephen Hemminger
  2024-05-16 15:40     ` [PATCH v5 2/9] ethdev: add common counters for statistics Stephen Hemminger
@ 2024-05-16 15:40     ` Stephen Hemminger
  2024-05-16 15:40     ` [PATCH v5 4/9] net/af_xdp: " Stephen Hemminger
                       ` (5 subsequent siblings)
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-16 15:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, John W. Linville

Use the new generic SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_packet/rte_eth_af_packet.c | 82 ++++-------------------
 1 file changed, 14 insertions(+), 68 deletions(-)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 397a32db58..89b737e7dc 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -10,6 +10,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
@@ -29,6 +30,7 @@
 #include <unistd.h>
 #include <poll.h>
 
+
 #define ETH_AF_PACKET_IFACE_ARG		"iface"
 #define ETH_AF_PACKET_NUM_Q_ARG		"qpairs"
 #define ETH_AF_PACKET_BLOCKSIZE_ARG	"blocksz"
@@ -51,8 +53,7 @@ struct pkt_rx_queue {
 	uint16_t in_port;
 	uint8_t vlan_strip;
 
-	volatile unsigned long rx_pkts;
-	volatile unsigned long rx_bytes;
+	struct rte_eth_counters stats;
 };
 
 struct pkt_tx_queue {
@@ -64,11 +65,10 @@ struct pkt_tx_queue {
 	unsigned int framecount;
 	unsigned int framenum;
 
-	volatile unsigned long tx_pkts;
-	volatile unsigned long err_pkts;
-	volatile unsigned long tx_bytes;
+	struct rte_eth_counters stats;
 };
 
+
 struct pmd_internals {
 	unsigned nb_queues;
 
@@ -118,8 +118,6 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	uint8_t *pbuf;
 	struct pkt_rx_queue *pkt_q = queue;
-	uint16_t num_rx = 0;
-	unsigned long num_rx_bytes = 0;
 	unsigned int framecount, framenum;
 
 	if (unlikely(nb_pkts == 0))
@@ -164,13 +162,11 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* account for the receive frame */
 		bufs[i] = mbuf;
-		num_rx++;
-		num_rx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&pkt_q->stats, mbuf);
 	}
 	pkt_q->framenum = framenum;
-	pkt_q->rx_pkts += num_rx;
-	pkt_q->rx_bytes += num_rx_bytes;
-	return num_rx;
+
+	return i;
 }
 
 /*
@@ -205,8 +201,6 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	unsigned int framecount, framenum;
 	struct pollfd pfd;
 	struct pkt_tx_queue *pkt_q = queue;
-	uint16_t num_tx = 0;
-	unsigned long num_tx_bytes = 0;
 	int i;
 
 	if (unlikely(nb_pkts == 0))
@@ -285,8 +279,7 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			framenum = 0;
 		ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base;
 
-		num_tx++;
-		num_tx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&pkt_q->stats, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -298,15 +291,9 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		 * packets will be considered successful even though only some
 		 * are sent.
 		 */
-
-		num_tx = 0;
-		num_tx_bytes = 0;
 	}
 
 	pkt_q->framenum = framenum;
-	pkt_q->tx_pkts += num_tx;
-	pkt_q->err_pkts += i - num_tx;
-	pkt_q->tx_bytes += num_tx_bytes;
 	return i;
 }
 
@@ -386,58 +373,17 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
-		igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
-		rx_total += igb_stats->q_ipackets[i];
-		rx_bytes_total += igb_stats->q_ibytes[i];
-	}
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
-		igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
-		tx_total += igb_stats->q_opackets[i];
-		tx_err_total += internal->tx_queue[i].err_pkts;
-		tx_bytes_total += igb_stats->q_obytes[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->ibytes = rx_bytes_total;
-	igb_stats->opackets = tx_total;
-	igb_stats->oerrors = tx_err_total;
-	igb_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+					  offsetof(struct pkt_rx_queue, stats), stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->rx_queue[i].rx_pkts = 0;
-		internal->rx_queue[i].rx_bytes = 0;
-	}
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->tx_queue[i].tx_pkts = 0;
-		internal->tx_queue[i].err_pkts = 0;
-		internal->tx_queue[i].tx_bytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 static int
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v5 4/9] net/af_xdp: use generic SW stats
  2024-05-16 15:40   ` [PATCH v5 0/9] Generic 64 bit counters Stephen Hemminger
                       ` (2 preceding siblings ...)
  2024-05-16 15:40     ` [PATCH v5 3/9] net/af_packet: use generic SW stats Stephen Hemminger
@ 2024-05-16 15:40     ` Stephen Hemminger
  2024-05-16 15:40     ` [PATCH v5 5/9] net/pcap: " Stephen Hemminger
                       ` (4 subsequent siblings)
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-16 15:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Ciara Loftus

Use common code for all SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 98 ++++++++---------------------
 1 file changed, 25 insertions(+), 73 deletions(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 268a130c49..65fc2f478f 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -20,6 +20,7 @@
 #include <rte_ethdev.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
 #include <rte_string_fns.h>
@@ -120,19 +121,13 @@ struct xsk_umem_info {
 	uint32_t max_xsks;
 };
 
-struct rx_stats {
-	uint64_t rx_pkts;
-	uint64_t rx_bytes;
-	uint64_t rx_dropped;
-};
-
 struct pkt_rx_queue {
 	struct xsk_ring_cons rx;
 	struct xsk_umem_info *umem;
 	struct xsk_socket *xsk;
 	struct rte_mempool *mb_pool;
 
-	struct rx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct xsk_ring_prod fq;
 	struct xsk_ring_cons cq;
@@ -143,17 +138,11 @@ struct pkt_rx_queue {
 	int busy_budget;
 };
 
-struct tx_stats {
-	uint64_t tx_pkts;
-	uint64_t tx_bytes;
-	uint64_t tx_dropped;
-};
-
 struct pkt_tx_queue {
 	struct xsk_ring_prod tx;
 	struct xsk_umem_info *umem;
 
-	struct tx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct pkt_rx_queue *pair;
 	int xsk_queue_idx;
@@ -308,7 +297,6 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_ring_prod *fq = &rxq->fq;
 	struct xsk_umem_info *umem = rxq->umem;
 	uint32_t idx_rx = 0;
-	unsigned long rx_bytes = 0;
 	int i;
 	struct rte_mbuf *fq_bufs[ETH_AF_XDP_RX_BATCH_SIZE];
 
@@ -363,16 +351,13 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		rte_pktmbuf_pkt_len(bufs[i]) = len;
 		rte_pktmbuf_data_len(bufs[i]) = len;
-		rx_bytes += len;
+
+		rte_eth_count_mbuf(&rxq->stats, bufs[i]);
 	}
 
 	xsk_ring_cons__release(rx, nb_pkts);
 	(void)reserve_fill_queue(umem, nb_pkts, fq_bufs, fq);
 
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
-
 	return nb_pkts;
 }
 #else
@@ -384,7 +369,6 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_umem_info *umem = rxq->umem;
 	struct xsk_ring_prod *fq = &rxq->fq;
 	uint32_t idx_rx = 0;
-	unsigned long rx_bytes = 0;
 	int i;
 	uint32_t free_thresh = fq->size >> 1;
 	struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
@@ -424,16 +408,13 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		rte_ring_enqueue(umem->buf_ring, (void *)addr);
 		rte_pktmbuf_pkt_len(mbufs[i]) = len;
 		rte_pktmbuf_data_len(mbufs[i]) = len;
-		rx_bytes += len;
+		rte_eth_count_mbuf(&rxq->stats, mbufs[i]);
+
 		bufs[i] = mbufs[i];
 	}
 
 	xsk_ring_cons__release(rx, nb_pkts);
 
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
-
 	return nb_pkts;
 }
 #endif
@@ -527,9 +508,8 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct pkt_tx_queue *txq = queue;
 	struct xsk_umem_info *umem = txq->umem;
 	struct rte_mbuf *mbuf;
-	unsigned long tx_bytes = 0;
 	int i;
-	uint32_t idx_tx;
+	uint32_t idx_tx, pkt_len;
 	uint16_t count = 0;
 	struct xdp_desc *desc;
 	uint64_t addr, offset;
@@ -541,6 +521,7 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	for (i = 0; i < nb_pkts; i++) {
 		mbuf = bufs[i];
+		pkt_len = rte_pktmbuf_pkt_len(mbuf);
 
 		if (mbuf->pool == umem->mb_pool) {
 			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
@@ -589,17 +570,13 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			count++;
 		}
 
-		tx_bytes += mbuf->pkt_len;
+		rte_eth_count_packet(&txq->stats, pkt_len);
 	}
 
 out:
 	xsk_ring_prod__submit(&txq->tx, count);
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += count;
-	txq->stats.tx_bytes += tx_bytes;
-	txq->stats.tx_dropped += nb_pkts - count;
-
 	return count;
 }
 #else
@@ -610,7 +587,6 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_umem_info *umem = txq->umem;
 	struct rte_mbuf *mbuf;
 	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
-	unsigned long tx_bytes = 0;
 	int i;
 	uint32_t idx_tx;
 	struct xsk_ring_cons *cq = &txq->pair->cq;
@@ -640,7 +616,8 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		pkt = xsk_umem__get_data(umem->mz->addr,
 					 desc->addr);
 		rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len);
-		tx_bytes += mbuf->pkt_len;
+		rte_eth_qsw_update(&txq->stats, mbuf);
+
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -648,9 +625,6 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += nb_pkts;
-	txq->stats.tx_bytes += tx_bytes;
-
 	return nb_pkts;
 }
 
@@ -847,39 +821,26 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
 	struct pmd_process_private *process_private = dev->process_private;
-	struct xdp_statistics xdp_stats;
-	struct pkt_rx_queue *rxq;
-	struct pkt_tx_queue *txq;
-	socklen_t optlen;
-	int i, ret, fd;
+	unsigned int i;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		optlen = sizeof(struct xdp_statistics);
-		rxq = &internals->rx_queues[i];
-		txq = rxq->pair;
-		stats->q_ipackets[i] = rxq->stats.rx_pkts;
-		stats->q_ibytes[i] = rxq->stats.rx_bytes;
+	rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+				   offsetof(struct pkt_rx_queue, stats), stats);
 
-		stats->q_opackets[i] = txq->stats.tx_pkts;
-		stats->q_obytes[i] = txq->stats.tx_bytes;
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct xdp_statistics xdp_stats;
+		socklen_t optlen = sizeof(xdp_stats);
+		int fd;
 
-		stats->ipackets += stats->q_ipackets[i];
-		stats->ibytes += stats->q_ibytes[i];
-		stats->imissed += rxq->stats.rx_dropped;
-		stats->oerrors += txq->stats.tx_dropped;
 		fd = process_private->rxq_xsk_fds[i];
-		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
-					   &xdp_stats, &optlen) : -1;
-		if (ret != 0) {
+		if (fd < 0)
+			continue;
+		if (getsockopt(fd, SOL_XDP, XDP_STATISTICS,
+			       &xdp_stats, &optlen)  < 0) {
 			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
 			return -1;
 		}
 		stats->imissed += xdp_stats.rx_dropped;
-
-		stats->opackets += stats->q_opackets[i];
-		stats->obytes += stats->q_obytes[i];
 	}
 
 	return 0;
@@ -888,17 +849,8 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
-	int i;
-
-	for (i = 0; i < internals->queue_cnt; i++) {
-		memset(&internals->rx_queues[i].stats, 0,
-					sizeof(struct rx_stats));
-		memset(&internals->tx_queues[i].stats, 0,
-					sizeof(struct tx_stats));
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 #ifdef RTE_NET_AF_XDP_LIBBPF_XDP_ATTACH
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v5 5/9] net/pcap: use generic SW stats
  2024-05-16 15:40   ` [PATCH v5 0/9] Generic 64 bit counters Stephen Hemminger
                       ` (3 preceding siblings ...)
  2024-05-16 15:40     ` [PATCH v5 4/9] net/af_xdp: " Stephen Hemminger
@ 2024-05-16 15:40     ` Stephen Hemminger
  2024-05-16 15:40     ` [PATCH v5 6/9] test/pmd_ring: initialize mbufs Stephen Hemminger
                       ` (3 subsequent siblings)
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-16 15:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use common statistics for SW drivers.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/pcap/pcap_ethdev.c | 125 +++++++--------------------------
 1 file changed, 26 insertions(+), 99 deletions(-)

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index bfec085045..b1a983f871 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -11,6 +11,7 @@
 
 #include <rte_cycles.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <ethdev_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_malloc.h>
@@ -48,13 +49,6 @@ static uint8_t iface_idx;
 static uint64_t timestamp_rx_dynflag;
 static int timestamp_dynfield_offset = -1;
 
-struct queue_stat {
-	volatile unsigned long pkts;
-	volatile unsigned long bytes;
-	volatile unsigned long err_pkts;
-	volatile unsigned long rx_nombuf;
-};
-
 struct queue_missed_stat {
 	/* last value retrieved from pcap */
 	unsigned int pcap;
@@ -68,7 +62,7 @@ struct pcap_rx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
 	struct rte_mempool *mb_pool;
-	struct queue_stat rx_stat;
+	struct rte_eth_counters rx_stat;
 	struct queue_missed_stat missed_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
@@ -80,7 +74,7 @@ struct pcap_rx_queue {
 struct pcap_tx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
-	struct queue_stat tx_stat;
+	struct rte_eth_counters tx_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
 };
@@ -238,7 +232,6 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	int i;
 	struct pcap_rx_queue *pcap_q = queue;
-	uint32_t rx_bytes = 0;
 
 	if (unlikely(nb_pkts == 0))
 		return 0;
@@ -252,39 +245,35 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		if (err)
 			return i;
 
+		rte_eth_count_mbuf(&pcap_q->rx_stat, pcap_buf);
+
 		rte_memcpy(rte_pktmbuf_mtod(bufs[i], void *),
 				rte_pktmbuf_mtod(pcap_buf, void *),
 				pcap_buf->data_len);
 		bufs[i]->data_len = pcap_buf->data_len;
 		bufs[i]->pkt_len = pcap_buf->pkt_len;
 		bufs[i]->port = pcap_q->port_id;
-		rx_bytes += pcap_buf->data_len;
+
 
 		/* Enqueue packet back on ring to allow infinite rx. */
 		rte_ring_enqueue(pcap_q->pkts, pcap_buf);
 	}
 
-	pcap_q->rx_stat.pkts += i;
-	pcap_q->rx_stat.bytes += rx_bytes;
-
 	return i;
 }
 
 static uint16_t
 eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
+	struct pcap_rx_queue *pcap_q = queue;
+	struct rte_eth_dev *dev = &rte_eth_devices[pcap_q->port_id];
+	struct pmd_process_private *pp = dev->process_private;
+	pcap_t *pcap = pp->rx_pcap[pcap_q->queue_id];
 	unsigned int i;
 	struct pcap_pkthdr header;
-	struct pmd_process_private *pp;
 	const u_char *packet;
 	struct rte_mbuf *mbuf;
-	struct pcap_rx_queue *pcap_q = queue;
 	uint16_t num_rx = 0;
-	uint32_t rx_bytes = 0;
-	pcap_t *pcap;
-
-	pp = rte_eth_devices[pcap_q->port_id].process_private;
-	pcap = pp->rx_pcap[pcap_q->queue_id];
 
 	if (unlikely(pcap == NULL || nb_pkts == 0))
 		return 0;
@@ -300,7 +289,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		mbuf = rte_pktmbuf_alloc(pcap_q->mb_pool);
 		if (unlikely(mbuf == NULL)) {
-			pcap_q->rx_stat.rx_nombuf++;
+			++dev->data->rx_mbuf_alloc_failed;
 			break;
 		}
 
@@ -315,7 +304,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 						       mbuf,
 						       packet,
 						       header.caplen) == -1)) {
-				pcap_q->rx_stat.err_pkts++;
+				rte_eth_count_error(&pcap_q->rx_stat);
 				rte_pktmbuf_free(mbuf);
 				break;
 			}
@@ -329,11 +318,10 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		mbuf->ol_flags |= timestamp_rx_dynflag;
 		mbuf->port = pcap_q->port_id;
 		bufs[num_rx] = mbuf;
+
+		rte_eth_count_mbuf(&pcap_q->rx_stat, mbuf);
 		num_rx++;
-		rx_bytes += header.caplen;
 	}
-	pcap_q->rx_stat.pkts += num_rx;
-	pcap_q->rx_stat.bytes += rx_bytes;
 
 	return num_rx;
 }
@@ -379,8 +367,6 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *dumper_q = queue;
-	uint16_t num_tx = 0;
-	uint32_t tx_bytes = 0;
 	struct pcap_pkthdr header;
 	pcap_dumper_t *dumper;
 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
@@ -412,8 +398,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		pcap_dump((u_char *)dumper, &header,
 			rte_pktmbuf_read(mbuf, 0, caplen, temp_data));
 
-		num_tx++;
-		tx_bytes += caplen;
+		rte_eth_count_mbuf(&dumper_q->tx_stat, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -423,9 +408,6 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	 * we flush the pcap dumper within each burst.
 	 */
 	pcap_dump_flush(dumper);
-	dumper_q->tx_stat.pkts += num_tx;
-	dumper_q->tx_stat.bytes += tx_bytes;
-	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
 
 	return nb_pkts;
 }
@@ -437,20 +419,16 @@ static uint16_t
 eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	unsigned int i;
-	uint32_t tx_bytes = 0;
 	struct pcap_tx_queue *tx_queue = queue;
 
 	if (unlikely(nb_pkts == 0))
 		return 0;
 
 	for (i = 0; i < nb_pkts; i++) {
-		tx_bytes += bufs[i]->pkt_len;
+		rte_eth_count_mbuf(&tx_queue->tx_stat, bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	tx_queue->tx_stat.pkts += nb_pkts;
-	tx_queue->tx_stat.bytes += tx_bytes;
-
 	return i;
 }
 
@@ -465,8 +443,6 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *tx_queue = queue;
-	uint16_t num_tx = 0;
-	uint32_t tx_bytes = 0;
 	pcap_t *pcap;
 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
 	size_t len;
@@ -497,15 +473,11 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			rte_pktmbuf_read(mbuf, 0, len, temp_data), len);
 		if (unlikely(ret != 0))
 			break;
-		num_tx++;
-		tx_bytes += len;
+
+		rte_eth_count_mbuf(&tx_queue->tx_stat, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
-	tx_queue->tx_stat.pkts += num_tx;
-	tx_queue->tx_stat.bytes += tx_bytes;
-	tx_queue->tx_stat.err_pkts += i - num_tx;
-
 	return i;
 }
 
@@ -746,41 +718,12 @@ static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned int i;
-	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
-	unsigned long rx_missed_total = 0;
-	unsigned long rx_nombuf_total = 0, rx_err_total = 0;
-	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
-	unsigned long tx_packets_err_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_queue[i].rx_stat.pkts;
-		stats->q_ibytes[i] = internal->rx_queue[i].rx_stat.bytes;
-		rx_nombuf_total += internal->rx_queue[i].rx_stat.rx_nombuf;
-		rx_err_total += internal->rx_queue[i].rx_stat.err_pkts;
-		rx_packets_total += stats->q_ipackets[i];
-		rx_bytes_total += stats->q_ibytes[i];
-		rx_missed_total += queue_missed_stat_get(dev, i);
-	}
 
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_queue[i].tx_stat.pkts;
-		stats->q_obytes[i] = internal->tx_queue[i].tx_stat.bytes;
-		tx_packets_total += stats->q_opackets[i];
-		tx_bytes_total += stats->q_obytes[i];
-		tx_packets_err_total += internal->tx_queue[i].tx_stat.err_pkts;
-	}
+	rte_eth_counters_stats_get(dev, offsetof(struct pcap_tx_queue, tx_stat),
+				   offsetof(struct pcap_rx_queue, rx_stat), stats);
 
-	stats->ipackets = rx_packets_total;
-	stats->ibytes = rx_bytes_total;
-	stats->imissed = rx_missed_total;
-	stats->ierrors = rx_err_total;
-	stats->rx_nombuf = rx_nombuf_total;
-	stats->opackets = tx_packets_total;
-	stats->obytes = tx_bytes_total;
-	stats->oerrors = tx_packets_err_total;
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		stats->imissed += queue_missed_stat_get(dev, i);
 
 	return 0;
 }
@@ -789,21 +732,12 @@ static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
 	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		internal->rx_queue[i].rx_stat.pkts = 0;
-		internal->rx_queue[i].rx_stat.bytes = 0;
-		internal->rx_queue[i].rx_stat.err_pkts = 0;
-		internal->rx_queue[i].rx_stat.rx_nombuf = 0;
-		queue_missed_stat_reset(dev, i);
-	}
+	rte_eth_counters_reset(dev, offsetof(struct pcap_tx_queue, tx_stat),
+			       offsetof(struct pcap_rx_queue, rx_stat));
 
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		internal->tx_queue[i].tx_stat.pkts = 0;
-		internal->tx_queue[i].tx_stat.bytes = 0;
-		internal->tx_queue[i].tx_stat.err_pkts = 0;
-	}
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		queue_missed_stat_reset(dev, i);
 
 	return 0;
 }
@@ -929,13 +863,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 				pcap_pkt_count);
 			return -EINVAL;
 		}
-
-		/*
-		 * Reset the stats for this queue since eth_pcap_rx calls above
-		 * didn't result in the application receiving packets.
-		 */
-		pcap_q->rx_stat.pkts = 0;
-		pcap_q->rx_stat.bytes = 0;
 	}
 
 	return 0;
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v5 6/9] test/pmd_ring: initialize mbufs
  2024-05-16 15:40   ` [PATCH v5 0/9] Generic 64 bit counters Stephen Hemminger
                       ` (4 preceding siblings ...)
  2024-05-16 15:40     ` [PATCH v5 5/9] net/pcap: " Stephen Hemminger
@ 2024-05-16 15:40     ` Stephen Hemminger
  2024-05-16 15:40     ` [PATCH v5 7/9] net/ring: use generic SW stats Stephen Hemminger
                       ` (2 subsequent siblings)
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-16 15:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Bruce Richardson

Do not pass uninitialized mbuf into the ring PMD.
The mbuf should be initialized first so that length is zero.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 app/test/test_pmd_ring.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/app/test/test_pmd_ring.c b/app/test/test_pmd_ring.c
index e83b9dd6b8..9cd5ae46db 100644
--- a/app/test/test_pmd_ring.c
+++ b/app/test/test_pmd_ring.c
@@ -68,14 +68,16 @@ test_ethdev_configure_port(int port)
 static int
 test_send_basic_packets(void)
 {
-	struct rte_mbuf  bufs[RING_SIZE];
-	struct rte_mbuf *pbufs[RING_SIZE];
+	struct rte_mbuf bufs[RING_SIZE];
+	struct rte_mbuf *pbufs[RING_SIZE] = { };
 	int i;
 
 	printf("Testing send and receive RING_SIZE/2 packets (tx_porta -> rx_portb)\n");
 
-	for (i = 0; i < RING_SIZE/2; i++)
+	for (i = 0; i < RING_SIZE / 2; i++) {
+		rte_pktmbuf_reset(&bufs[i]);
 		pbufs[i] = &bufs[i];
+	}
 
 	if (rte_eth_tx_burst(tx_porta, 0, pbufs, RING_SIZE/2) < RING_SIZE/2) {
 		printf("Failed to transmit packet burst port %d\n", tx_porta);
@@ -99,14 +101,16 @@ test_send_basic_packets(void)
 static int
 test_send_basic_packets_port(int port)
 {
-	struct rte_mbuf  bufs[RING_SIZE];
-	struct rte_mbuf *pbufs[RING_SIZE];
+	struct rte_mbuf bufs[RING_SIZE];
+	struct rte_mbuf *pbufs[RING_SIZE] = { };
 	int i;
 
 	printf("Testing send and receive RING_SIZE/2 packets (cmdl_port0 -> cmdl_port0)\n");
 
-	for (i = 0; i < RING_SIZE/2; i++)
+	for (i = 0; i < RING_SIZE / 2; i++) {
+		rte_pktmbuf_reset(&bufs[i]);
 		pbufs[i] = &bufs[i];
+	}
 
 	if (rte_eth_tx_burst(port, 0, pbufs, RING_SIZE/2) < RING_SIZE/2) {
 		printf("Failed to transmit packet burst port %d\n", port);
@@ -134,10 +138,11 @@ test_get_stats(int port)
 	struct rte_eth_stats stats;
 	struct rte_mbuf buf, *pbuf = &buf;
 
+	rte_pktmbuf_reset(&buf);
+
 	printf("Testing ring PMD stats_get port %d\n", port);
 
 	/* check stats of RXTX port, should all be zero */
-
 	rte_eth_stats_get(port, &stats);
 	if (stats.ipackets != 0 || stats.opackets != 0 ||
 			stats.ibytes != 0 || stats.obytes != 0 ||
@@ -173,6 +178,8 @@ test_stats_reset(int port)
 	struct rte_eth_stats stats;
 	struct rte_mbuf buf, *pbuf = &buf;
 
+	rte_pktmbuf_reset(&buf);
+
 	printf("Testing ring PMD stats_reset port %d\n", port);
 
 	rte_eth_stats_reset(port);
@@ -228,6 +235,7 @@ test_pmd_ring_pair_create_attach(void)
 	int ret;
 
 	memset(&null_conf, 0, sizeof(struct rte_eth_conf));
+	rte_pktmbuf_reset(&buf);
 
 	if ((rte_eth_dev_configure(rxtx_portd, 1, 1, &null_conf) < 0)
 			|| (rte_eth_dev_configure(rxtx_porte, 1, 1,
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v5 7/9] net/ring: use generic SW stats
  2024-05-16 15:40   ` [PATCH v5 0/9] Generic 64 bit counters Stephen Hemminger
                       ` (5 preceding siblings ...)
  2024-05-16 15:40     ` [PATCH v5 6/9] test/pmd_ring: initialize mbufs Stephen Hemminger
@ 2024-05-16 15:40     ` Stephen Hemminger
  2024-05-16 15:40     ` [PATCH v5 8/9] net/tap: " Stephen Hemminger
  2024-05-16 15:40     ` [PATCH v5 9/9] net/null: " Stephen Hemminger
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-16 15:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Bruce Richardson

Use generic per-queue infrastructure.
This also fixes bug where ring code was not accounting
for bytes.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/ring/rte_eth_ring.c | 71 +++++++++++++--------------------
 1 file changed, 28 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index 48953dd7a0..85f14dd679 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -7,6 +7,7 @@
 #include "rte_eth_ring.h"
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <rte_os_shim.h>
@@ -44,8 +45,8 @@ enum dev_action {
 
 struct ring_queue {
 	struct rte_ring *rng;
-	uint64_t rx_pkts;
-	uint64_t tx_pkts;
+
+	struct rte_eth_counters stats;
 };
 
 struct pmd_internals {
@@ -77,12 +78,13 @@ eth_ring_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
 	struct ring_queue *r = q;
-	const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SC_DEQ)
-		r->rx_pkts += nb_rx;
-	else
-		__atomic_fetch_add(&r->rx_pkts, nb_rx, __ATOMIC_RELAXED);
+	uint16_t i, nb_rx;
+
+	nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	for (i = 0; i < nb_rx; i++)
+		rte_eth_count_mbuf(&r->stats, bufs[i]);
+
 	return nb_rx;
 }
 
@@ -90,13 +92,20 @@ static uint16_t
 eth_ring_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
+	uint32_t *sizes;
 	struct ring_queue *r = q;
-	const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SP_ENQ)
-		r->tx_pkts += nb_tx;
-	else
-		__atomic_fetch_add(&r->tx_pkts, nb_tx, __ATOMIC_RELAXED);
+	uint16_t i, nb_tx;
+
+	sizes = alloca(sizeof(uint32_t) * nb_bufs);
+
+	for (i = 0; i < nb_bufs; i++)
+		sizes[i] = rte_pktmbuf_pkt_len(bufs[i]);
+
+	nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	for (i = 0; i < nb_tx; i++)
+		rte_eth_count_packet(&r->stats, sizes[i]);
+
 	return nb_tx;
 }
 
@@ -193,40 +202,16 @@ eth_dev_info(struct rte_eth_dev *dev,
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
-		rx_total += stats->q_ipackets[i];
-	}
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
-		tx_total += stats->q_opackets[i];
-	}
-
-	stats->ipackets = rx_total;
-	stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct ring_queue, stats),
+					  offsetof(struct ring_queue, stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		internal->rx_ring_queues[i].rx_pkts = 0;
-	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		internal->tx_ring_queues[i].tx_pkts = 0;
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct ring_queue, stats),
+				      offsetof(struct ring_queue, stats));
 }
 
 static void
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v5 8/9] net/tap: use generic SW stats
  2024-05-16 15:40   ` [PATCH v5 0/9] Generic 64 bit counters Stephen Hemminger
                       ` (6 preceding siblings ...)
  2024-05-16 15:40     ` [PATCH v5 7/9] net/ring: use generic SW stats Stephen Hemminger
@ 2024-05-16 15:40     ` Stephen Hemminger
  2024-05-16 15:40     ` [PATCH v5 9/9] net/null: " Stephen Hemminger
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-16 15:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use new common sw statistics.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/tap/rte_eth_tap.c | 88 ++++++-----------------------------
 drivers/net/tap/rte_eth_tap.h | 15 ++----
 2 files changed, 18 insertions(+), 85 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 69d9da695b..f87979da4f 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -432,7 +432,6 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rx_queue *rxq = queue;
 	struct pmd_process_private *process_private;
 	uint16_t num_rx;
-	unsigned long num_rx_bytes = 0;
 	uint32_t trigger = tap_trigger;
 
 	if (trigger == rxq->trigger_seen)
@@ -455,7 +454,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* Packet couldn't fit in the provided mbuf */
 		if (unlikely(rxq->pi.flags & TUN_PKT_STRIP)) {
-			rxq->stats.ierrors++;
+			rte_eth_count_error(&rxq->stats);
 			continue;
 		}
 
@@ -467,7 +466,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
 
 			if (unlikely(!buf)) {
-				rxq->stats.rx_nombuf++;
+				struct rte_eth_dev *dev = &rte_eth_devices[rxq->in_port];
+				++dev->data->rx_mbuf_alloc_failed;
+
 				/* No new buf has been allocated: do nothing */
 				if (!new_tail || !seg)
 					goto end;
@@ -509,11 +510,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* account for the receive frame */
 		bufs[num_rx++] = mbuf;
-		num_rx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&rxq->stats, mbuf);
 	}
 end:
-	rxq->stats.ipackets += num_rx;
-	rxq->stats.ibytes += num_rx_bytes;
 
 	if (trigger && num_rx < nb_pkts)
 		rxq->trigger_seen = trigger;
@@ -523,8 +522,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 static inline int
 tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
-			struct rte_mbuf **pmbufs,
-			uint16_t *num_packets, unsigned long *num_tx_bytes)
+		struct rte_mbuf **pmbufs)
 {
 	struct pmd_process_private *process_private;
 	int i;
@@ -647,8 +645,7 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
 		if (n <= 0)
 			return -1;
 
-		(*num_packets)++;
-		(*num_tx_bytes) += rte_pktmbuf_pkt_len(mbuf);
+		rte_eth_count_mbuf(&txq->stats, mbuf);
 	}
 	return 0;
 }
@@ -660,8 +657,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	struct tx_queue *txq = queue;
 	uint16_t num_tx = 0;
-	uint16_t num_packets = 0;
-	unsigned long num_tx_bytes = 0;
 	uint32_t max_size;
 	int i;
 
@@ -693,7 +688,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			tso_segsz = mbuf_in->tso_segsz + hdrs_len;
 			if (unlikely(tso_segsz == hdrs_len) ||
 				tso_segsz > *txq->mtu) {
-				txq->stats.errs++;
+				rte_eth_count_error(&txq->stats);
 				break;
 			}
 			gso_ctx->gso_size = tso_segsz;
@@ -728,10 +723,10 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			num_mbufs = 1;
 		}
 
-		ret = tap_write_mbufs(txq, num_mbufs, mbuf,
-				&num_packets, &num_tx_bytes);
+		ret = tap_write_mbufs(txq, num_mbufs, mbuf);
 		if (ret == -1) {
-			txq->stats.errs++;
+			rte_eth_count_error(&txq->stats);
+
 			/* free tso mbufs */
 			if (num_tso_mbufs > 0)
 				rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs);
@@ -749,10 +744,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		}
 	}
 
-	txq->stats.opackets += num_packets;
-	txq->stats.errs += nb_pkts - num_tx;
-	txq->stats.obytes += num_tx_bytes;
-
 	return num_tx;
 }
 
@@ -1055,64 +1046,15 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
 {
-	unsigned int i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	unsigned long rx_nombuf = 0, ierrors = 0;
-	const struct pmd_internals *pmd = dev->data->dev_private;
-
-	/* rx queue statistics */
-	imax = (dev->data->nb_rx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_rx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
-		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
-		rx_total += tap_stats->q_ipackets[i];
-		rx_bytes_total += tap_stats->q_ibytes[i];
-		rx_nombuf += pmd->rxq[i].stats.rx_nombuf;
-		ierrors += pmd->rxq[i].stats.ierrors;
-	}
-
-	/* tx queue statistics */
-	imax = (dev->data->nb_tx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_tx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
-		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
-		tx_total += tap_stats->q_opackets[i];
-		tx_err_total += pmd->txq[i].stats.errs;
-		tx_bytes_total += tap_stats->q_obytes[i];
-	}
-
-	tap_stats->ipackets = rx_total;
-	tap_stats->ibytes = rx_bytes_total;
-	tap_stats->ierrors = ierrors;
-	tap_stats->rx_nombuf = rx_nombuf;
-	tap_stats->opackets = tx_total;
-	tap_stats->oerrors = tx_err_total;
-	tap_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct tx_queue, stats),
+					  offsetof(struct rx_queue, stats), tap_stats);
 }
 
 static int
 tap_stats_reset(struct rte_eth_dev *dev)
 {
-	int i;
-	struct pmd_internals *pmd = dev->data->dev_private;
-
-	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-		pmd->rxq[i].stats.ipackets = 0;
-		pmd->rxq[i].stats.ibytes = 0;
-		pmd->rxq[i].stats.ierrors = 0;
-		pmd->rxq[i].stats.rx_nombuf = 0;
-
-		pmd->txq[i].stats.opackets = 0;
-		pmd->txq[i].stats.errs = 0;
-		pmd->txq[i].stats.obytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct tx_queue, stats),
+				      offsetof(struct rx_queue, stats));
 }
 
 static int
diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
index 5ac93f93e9..8cba9ea410 100644
--- a/drivers/net/tap/rte_eth_tap.h
+++ b/drivers/net/tap/rte_eth_tap.h
@@ -14,6 +14,7 @@
 #include <linux/if_tun.h>
 
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_ether.h>
 #include <rte_gso.h>
 #include "tap_log.h"
@@ -32,23 +33,13 @@ enum rte_tuntap_type {
 	ETH_TUNTAP_TYPE_MAX,
 };
 
-struct pkt_stats {
-	uint64_t opackets;              /* Number of output packets */
-	uint64_t ipackets;              /* Number of input packets */
-	uint64_t obytes;                /* Number of bytes on output */
-	uint64_t ibytes;                /* Number of bytes on input */
-	uint64_t errs;                  /* Number of TX error packets */
-	uint64_t ierrors;               /* Number of RX error packets */
-	uint64_t rx_nombuf;             /* Nb of RX mbuf alloc failures */
-};
-
 struct rx_queue {
 	struct rte_mempool *mp;         /* Mempool for RX packets */
 	uint32_t trigger_seen;          /* Last seen Rx trigger value */
 	uint16_t in_port;               /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-	struct pkt_stats stats;         /* Stats for this RX queue */
 	uint16_t nb_rx_desc;            /* max number of mbufs available */
+	struct rte_eth_counters stats;  /* Stats for this RX queue */
 	struct rte_eth_rxmode *rxmode;  /* RX features */
 	struct rte_mbuf *pool;          /* mbufs pool for this queue */
 	struct iovec (*iovecs)[];       /* descriptors for this queue */
@@ -59,7 +50,7 @@ struct tx_queue {
 	int type;                       /* Type field - TUN|TAP */
 	uint16_t *mtu;                  /* Pointer to MTU from dev_data */
 	uint16_t csum:1;                /* Enable checksum offloading */
-	struct pkt_stats stats;         /* Stats for this TX queue */
+	struct rte_eth_counters stats;	/* Stats for this TX queue */
 	struct rte_gso_ctx gso_ctx;     /* GSO context */
 	uint16_t out_port;              /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v5 9/9] net/null: use generic SW stats
  2024-05-16 15:40   ` [PATCH v5 0/9] Generic 64 bit counters Stephen Hemminger
                       ` (7 preceding siblings ...)
  2024-05-16 15:40     ` [PATCH v5 8/9] net/tap: " Stephen Hemminger
@ 2024-05-16 15:40     ` Stephen Hemminger
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-16 15:40 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Tetsuya Mukawa

Use the new common code for statistics.
This also fixes the bug that this driver was not accounting
for bytes.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/null/rte_eth_null.c | 80 +++++++--------------------------
 1 file changed, 17 insertions(+), 63 deletions(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 7c46004f1e..7786982732 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -8,6 +8,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <bus_vdev_driver.h>
@@ -37,8 +38,8 @@ struct null_queue {
 	struct rte_mempool *mb_pool;
 	struct rte_mbuf *dummy_packet;
 
-	uint64_t rx_pkts;
-	uint64_t tx_pkts;
+	struct rte_eth_counters tx_stats;
+	struct rte_eth_counters rx_stats;
 };
 
 struct pmd_options {
@@ -99,11 +100,9 @@ eth_null_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		bufs[i]->data_len = (uint16_t)packet_size;
 		bufs[i]->pkt_len = packet_size;
 		bufs[i]->port = h->internals->port_id;
+		rte_eth_count_mbuf(&h->rx_stats, bufs[i]);
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
-
 	return i;
 }
 
@@ -127,11 +126,9 @@ eth_null_copy_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		bufs[i]->data_len = (uint16_t)packet_size;
 		bufs[i]->pkt_len = packet_size;
 		bufs[i]->port = h->internals->port_id;
+		rte_eth_count_mbuf(&h->rx_stats, bufs[i]);
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
-
 	return i;
 }
 
@@ -151,11 +148,10 @@ eth_null_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	if ((q == NULL) || (bufs == NULL))
 		return 0;
 
-	for (i = 0; i < nb_bufs; i++)
+	for (i = 0; i < nb_bufs; i++) {
+		rte_eth_count_mbuf(&h->tx_stats, bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
-
-	/* NOTE: review for potential ordering optimization */
-	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
+	}
 
 	return i;
 }
@@ -174,12 +170,10 @@ eth_null_copy_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	for (i = 0; i < nb_bufs; i++) {
 		rte_memcpy(h->dummy_packet, rte_pktmbuf_mtod(bufs[i], void *),
 					packet_size);
+		rte_eth_count_mbuf(&h->tx_stats, bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
-
 	return i;
 }
 
@@ -322,60 +316,20 @@ eth_dev_info(struct rte_eth_dev *dev,
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i, num_stats;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal;
-
-	if ((dev == NULL) || (igb_stats == NULL))
-		return -EINVAL;
-
-	internal = dev->data->dev_private;
-	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
-			RTE_MIN(dev->data->nb_rx_queues,
-				RTE_DIM(internal->rx_null_queues)));
-	for (i = 0; i < num_stats; i++) {
-		/* NOTE: review for atomic access */
-		igb_stats->q_ipackets[i] =
-			internal->rx_null_queues[i].rx_pkts;
-		rx_total += igb_stats->q_ipackets[i];
-	}
-
-	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
-			RTE_MIN(dev->data->nb_tx_queues,
-				RTE_DIM(internal->tx_null_queues)));
-	for (i = 0; i < num_stats; i++) {
-		/* NOTE: review for atomic access */
-		igb_stats->q_opackets[i] =
-			internal->tx_null_queues[i].tx_pkts;
-		tx_total += igb_stats->q_opackets[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev,
+					  offsetof(struct null_queue, tx_stats),
+					  offsetof(struct null_queue, rx_stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal;
-
-	if (dev == NULL)
-		return -EINVAL;
-
-	internal = dev->data->dev_private;
-	for (i = 0; i < RTE_DIM(internal->rx_null_queues); i++)
-		/* NOTE: review for atomic access */
-		internal->rx_null_queues[i].rx_pkts = 0;
-	for (i = 0; i < RTE_DIM(internal->tx_null_queues); i++)
-		/* NOTE: review for atomic access */
-		internal->tx_null_queues[i].tx_pkts = 0;
-
-	return 0;
+	return rte_eth_counters_reset(dev,
+				      offsetof(struct null_queue, tx_stats),
+				      offsetof(struct null_queue, rx_stats));
 }
 
 static void
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v5 1/9] eal: generic 64 bit counter
  2024-05-16 15:40     ` [PATCH v5 1/9] eal: generic 64 bit counter Stephen Hemminger
@ 2024-05-16 18:22       ` Wathsala Wathawana Vithanage
  2024-05-16 21:42         ` Stephen Hemminger
  0 siblings, 1 reply; 179+ messages in thread
From: Wathsala Wathawana Vithanage @ 2024-05-16 18:22 UTC (permalink / raw)
  To: Stephen Hemminger, dev; +Cc: Morten Brørup, nd

Hi Stephen,

> +__rte_experimental
> +static inline uint64_t
> +rte_counter64_fetch(const rte_counter64_t *counter) {
> +	return *counter;
> +}

What if the address pointed by counter is not aligned and the
architecture doesn't support atomic (untorn) loads on non-aligned loads? 

--wathsala



^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v5 2/9] ethdev: add common counters for statistics
  2024-05-16 15:40     ` [PATCH v5 2/9] ethdev: add common counters for statistics Stephen Hemminger
@ 2024-05-16 18:30       ` Wathsala Wathawana Vithanage
  2024-05-17  0:19         ` Stephen Hemminger
  0 siblings, 1 reply; 179+ messages in thread
From: Wathsala Wathawana Vithanage @ 2024-05-16 18:30 UTC (permalink / raw)
  To: Stephen Hemminger, dev; +Cc: thomas, Ferruh Yigit, Andrew Rybchenko, nd

> +		packets = rte_counter64_fetch(&counters->packets);
> +		bytes = rte_counter64_fetch(&counters->bytes);
> +		errors = rte_counter64_fetch(&counters->errors);
> +
> +		rte_compiler_barrier();
> +
> +		stats->ipackets += packets;
> +		stats->ibytes += bytes;
> +		stats->ierrors += errors;
> +

there seems to be a dependency chain in the above loads and subsequent stores.
If that's the case what's the purpose of the compiler barrier?

--wathsala

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v5 1/9] eal: generic 64 bit counter
  2024-05-16 18:22       ` Wathsala Wathawana Vithanage
@ 2024-05-16 21:42         ` Stephen Hemminger
  2024-05-17  2:39           ` Honnappa Nagarahalli
  0 siblings, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-16 21:42 UTC (permalink / raw)
  To: Wathsala Wathawana Vithanage; +Cc: dev, Morten Brørup, nd

On Thu, 16 May 2024 18:22:23 +0000
Wathsala Wathawana Vithanage <wathsala.vithanage@arm.com> wrote:

> Hi Stephen,
> 
> > +__rte_experimental
> > +static inline uint64_t
> > +rte_counter64_fetch(const rte_counter64_t *counter) {
> > +	return *counter;
> > +}  
> 
> What if the address pointed by counter is not aligned and the
> architecture doesn't support atomic (untorn) loads on non-aligned loads? 
> 
> --wathsala

Then the driver is using it incorrectly. For the use case of a set of counters
(even if embedded in another struct), the compiler takes care of this.

Remember this is an internal API, not something that needs to handle user
abuse.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v6 0/9] Generic 64 bit counters for SW drivers
  2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
                     ` (7 preceding siblings ...)
  2024-05-16 15:40   ` [PATCH v5 0/9] Generic 64 bit counters Stephen Hemminger
@ 2024-05-17  0:12   ` Stephen Hemminger
  2024-05-17  0:12     ` [PATCH v6 1/9] eal: generic 64 bit counter Stephen Hemminger
                       ` (8 more replies)
  2024-05-17 17:35   ` [PATCH v7 0/9] Use weak atomic operations for SW PMD counters Stephen Hemminger
                     ` (3 subsequent siblings)
  12 siblings, 9 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17  0:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger


Adds common code for collecting basic statistics used by many
SW based PMD's such as af_packet, af_xdp, null, tap and ring.

Solves the problem where counters could tear because of
32 bit load/store on 32 bit platforms.

v6 - address initialization of mbuf in test/pmd_ring

Stephen Hemminger (9):
  eal: generic 64 bit counter
  ethdev: add common counters for statistics
  net/af_packet: use generic SW stats
  net/af_xdp: use generic SW stats
  net/pcap: use generic SW stats
  test/pmd_ring: initialize mbufs
  net/ring: use generic SW stats
  net/tap: use generic SW stats
  net/null: use generic SW stats

 app/test/test_pmd_ring.c                  |  26 ++++-
 drivers/net/af_packet/rte_eth_af_packet.c |  82 +++-----------
 drivers/net/af_xdp/rte_eth_af_xdp.c       |  98 +++++------------
 drivers/net/null/rte_eth_null.c           |  80 +++-----------
 drivers/net/pcap/pcap_ethdev.c            | 125 +++++-----------------
 drivers/net/ring/rte_eth_ring.c           |  71 +++++-------
 drivers/net/tap/rte_eth_tap.c             |  88 +++------------
 drivers/net/tap/rte_eth_tap.h             |  15 +--
 lib/eal/include/meson.build               |   1 +
 lib/eal/include/rte_counter.h             |  98 +++++++++++++++++
 lib/ethdev/ethdev_swstats.c               | 109 +++++++++++++++++++
 lib/ethdev/ethdev_swstats.h               | 124 +++++++++++++++++++++
 lib/ethdev/meson.build                    |   2 +
 lib/ethdev/version.map                    |   3 +
 14 files changed, 486 insertions(+), 436 deletions(-)
 create mode 100644 lib/eal/include/rte_counter.h
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v6 1/9] eal: generic 64 bit counter
  2024-05-17  0:12   ` [PATCH v6 0/9] Generic 64 bit counters for SW drivers Stephen Hemminger
@ 2024-05-17  0:12     ` Stephen Hemminger
  2024-05-17  2:45       ` Honnappa Nagarahalli
  2024-05-17  0:12     ` [PATCH v6 2/9] ethdev: add common counters for statistics Stephen Hemminger
                       ` (7 subsequent siblings)
  8 siblings, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17  0:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Morten Brørup

This header implements 64 bit counters that are NOT atomic
but are safe against load/store splits on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 lib/eal/include/meson.build   |  1 +
 lib/eal/include/rte_counter.h | 98 +++++++++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+)
 create mode 100644 lib/eal/include/rte_counter.h

diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
index e94b056d46..c070dd0079 100644
--- a/lib/eal/include/meson.build
+++ b/lib/eal/include/meson.build
@@ -12,6 +12,7 @@ headers += files(
         'rte_class.h',
         'rte_common.h',
         'rte_compat.h',
+        'rte_counter.h',
         'rte_debug.h',
         'rte_dev.h',
         'rte_devargs.h',
diff --git a/lib/eal/include/rte_counter.h b/lib/eal/include/rte_counter.h
new file mode 100644
index 0000000000..d623195d63
--- /dev/null
+++ b/lib/eal/include/rte_counter.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_COUNTER_H_
+#define _RTE_COUNTER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Counter
+ *
+ * A counter is 64 bit value that is safe from split read/write
+ * on 32 bit platforms. It assumes that only one cpu at a time
+ * will update the counter, and another CPU may want to read it.
+ *
+ * This is a much weaker guarantee than full atomic variables
+ * but is faster since no locked operations are required for update.
+ */
+
+#ifdef RTE_ARCH_64
+/*
+ * On a platform that can support native 64 bit type, no special handling.
+ * These are just wrapper around 64 bit value.
+ */
+typedef uint64_t rte_counter64_t;
+
+/**
+ * Add value to counter.
+ */
+__rte_experimental
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	*counter += val;
+}
+
+__rte_experimental
+static inline uint64_t
+rte_counter64_fetch(const rte_counter64_t *counter)
+{
+	return *counter;
+}
+
+__rte_experimental
+static inline void
+rte_counter64_set(rte_counter64_t *counter, uint64_t val)
+{
+	*counter = val;
+}
+
+#else
+
+#include <rte_stdatomic.h>
+
+/*
+ * On a 32 bit platform need to use atomic to force the compler to not
+ * split 64 bit read/write.
+ */
+typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
+
+__rte_experimental
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
+}
+
+__rte_experimental
+static inline uint64_t
+rte_counter64_fetch(const rte_counter64_t *counter)
+{
+	return rte_atomic_load_explicit(counter, rte_memory_order_consume);
+}
+
+__rte_experimental
+static inline void
+rte_counter64_set(rte_counter64_t *counter, uint64_t val)
+{
+	rte_atomic_store_explicit(counter, val, rte_memory_order_release);
+}
+#endif
+
+__rte_experimental
+static inline void
+rte_counter64_reset(rte_counter64_t *counter)
+{
+	rte_counter64_set(counter, 0);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_COUNTER_H_ */
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v6 2/9] ethdev: add common counters for statistics
  2024-05-17  0:12   ` [PATCH v6 0/9] Generic 64 bit counters for SW drivers Stephen Hemminger
  2024-05-17  0:12     ` [PATCH v6 1/9] eal: generic 64 bit counter Stephen Hemminger
@ 2024-05-17  0:12     ` Stephen Hemminger
  2024-05-17  0:12     ` [PATCH v6 3/9] net/af_packet: use generic SW stats Stephen Hemminger
                       ` (6 subsequent siblings)
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17  0:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko

Introduce common helper routines for keeping track of per-queue
statistics in SW PMD's. The code in several drivers had
copy/pasted the same code for this, but had common issues
with 64 bit counters on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/ethdev/ethdev_swstats.c | 109 +++++++++++++++++++++++++++++++
 lib/ethdev/ethdev_swstats.h | 124 ++++++++++++++++++++++++++++++++++++
 lib/ethdev/meson.build      |   2 +
 lib/ethdev/version.map      |   3 +
 4 files changed, 238 insertions(+)
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

diff --git a/lib/ethdev/ethdev_swstats.c b/lib/ethdev/ethdev_swstats.c
new file mode 100644
index 0000000000..7892b2180b
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.c
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+
+#include <rte_config.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
+#include "rte_ethdev.h"
+#include "ethdev_driver.h"
+#include "ethdev_swstats.h"
+
+int
+rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset,
+			   struct rte_eth_stats *stats)
+{
+	uint64_t packets, bytes, errors;
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		const void *txq = dev->data->tx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (txq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)txq + tx_offset);
+		packets = rte_counter64_fetch(&counters->packets);
+		bytes = rte_counter64_fetch(&counters->bytes);
+		errors = rte_counter64_fetch(&counters->errors);
+
+		/*
+		 * Prevent compiler from fetching values twice which would
+		 * cause per-queue and global statistics to not match.
+		 */
+		rte_compiler_barrier();
+
+		stats->opackets += packets;
+		stats->obytes += bytes;
+		stats->oerrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_opackets[i] = packets;
+			stats->q_obytes[i] = bytes;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		const void *rxq = dev->data->rx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)rxq + rx_offset);
+		packets = rte_counter64_fetch(&counters->packets);
+		bytes = rte_counter64_fetch(&counters->bytes);
+		errors = rte_counter64_fetch(&counters->errors);
+
+		rte_compiler_barrier();
+
+		stats->ipackets += packets;
+		stats->ibytes += bytes;
+		stats->ierrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_ipackets[i] = packets;
+			stats->q_ibytes[i] = bytes;
+		}
+	}
+
+	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
+	return 0;
+}
+
+int
+rte_eth_counters_reset(struct rte_eth_dev *dev, size_t tx_offset, size_t rx_offset)
+{
+	struct rte_eth_counters *counters;
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		void *txq  = dev->data->tx_queues[i];
+
+		if (txq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)txq + tx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		void *rxq  = dev->data->rx_queues[i];
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)rxq + rx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	return 0;
+}
diff --git a/lib/ethdev/ethdev_swstats.h b/lib/ethdev/ethdev_swstats.h
new file mode 100644
index 0000000000..808c540640
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_ETHDEV_SWSTATS_H_
+#define _RTE_ETHDEV_SWSTATS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ *
+ * Internal statistics counters for software based devices.
+ * Hardware PMD's should use the hardware counters instead.
+ *
+ * This provides a library for PMD's to keep track of packets and bytes.
+ * It is assumed that this will be used per queue and queues are not
+ * shared by lcores.
+ */
+
+#include <rte_counter.h>
+
+/**
+ * A structure to be embedded in the device driver per-queue data.
+ */
+struct rte_eth_counters {
+	rte_counter64_t	packets;	/**< Total number of packets. */
+	rte_counter64_t	bytes;		/**< Total number of bytes. */
+	rte_counter64_t	errors;		/**< Total number of packets with errors. */
+};
+
+/**
+ * @internal
+ * Increment counters for a single packet.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ * @param sz
+ *    Size of the packet in bytes.
+ */
+__rte_internal
+static inline void
+rte_eth_count_packet(struct rte_eth_counters *counters, uint32_t sz)
+{
+	rte_counter64_add(&counters->packets, 1);
+	rte_counter64_add(&counters->bytes, sz);
+}
+
+/**
+ * @internal
+ * Increment counters based on mbuf.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ * @param mbuf
+ *    Received or transmitted mbuf.
+ */
+__rte_internal
+static inline void
+rte_eth_count_mbuf(struct rte_eth_counters *counters, const struct rte_mbuf *mbuf)
+{
+	rte_eth_count_packet(counters, rte_pktmbuf_pkt_len(mbuf));
+}
+
+/**
+ * @internal
+ * Increment error counter.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ */
+__rte_internal
+static inline void
+rte_eth_count_error(struct rte_eth_counters *counters)
+{
+	rte_counter64_add(&counters->errors, 1);
+}
+
+/**
+ * @internal
+ * Retrieve the general statistics for all queues.
+ * @see rte_eth_stats_get.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @param stats
+ *   A pointer to a structure of type *rte_eth_stats* to be filled
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			       size_t tx_offset, size_t rx_offset,
+			       struct rte_eth_stats *stats);
+
+/**
+ * @internal
+ * Reset the statistics for all queues.
+ * @see rte_eth_stats_reset.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_reset(struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHDEV_SWSTATS_H_ */
diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build
index f1d2586591..7ce29a46d4 100644
--- a/lib/ethdev/meson.build
+++ b/lib/ethdev/meson.build
@@ -3,6 +3,7 @@
 
 sources = files(
         'ethdev_driver.c',
+        'ethdev_swstats.c',
         'ethdev_private.c',
         'ethdev_profile.c',
         'ethdev_trace_points.c',
@@ -42,6 +43,7 @@ driver_sdk_headers += files(
         'ethdev_driver.h',
         'ethdev_pci.h',
         'ethdev_vdev.h',
+        'ethdev_swstats.h',
 )
 
 if is_linux
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 79f6f5293b..fc595be278 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -358,4 +358,7 @@ INTERNAL {
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
 	rte_flow_fp_default_ops;
+
+	rte_eth_counters_reset;
+	rte_eth_counters_stats_get;
 };
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v6 3/9] net/af_packet: use generic SW stats
  2024-05-17  0:12   ` [PATCH v6 0/9] Generic 64 bit counters for SW drivers Stephen Hemminger
  2024-05-17  0:12     ` [PATCH v6 1/9] eal: generic 64 bit counter Stephen Hemminger
  2024-05-17  0:12     ` [PATCH v6 2/9] ethdev: add common counters for statistics Stephen Hemminger
@ 2024-05-17  0:12     ` Stephen Hemminger
  2024-05-17  0:12     ` [PATCH v6 4/9] net/af_xdp: " Stephen Hemminger
                       ` (5 subsequent siblings)
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17  0:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, John W. Linville

Use the new generic SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_packet/rte_eth_af_packet.c | 82 ++++-------------------
 1 file changed, 14 insertions(+), 68 deletions(-)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 397a32db58..89b737e7dc 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -10,6 +10,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
@@ -29,6 +30,7 @@
 #include <unistd.h>
 #include <poll.h>
 
+
 #define ETH_AF_PACKET_IFACE_ARG		"iface"
 #define ETH_AF_PACKET_NUM_Q_ARG		"qpairs"
 #define ETH_AF_PACKET_BLOCKSIZE_ARG	"blocksz"
@@ -51,8 +53,7 @@ struct pkt_rx_queue {
 	uint16_t in_port;
 	uint8_t vlan_strip;
 
-	volatile unsigned long rx_pkts;
-	volatile unsigned long rx_bytes;
+	struct rte_eth_counters stats;
 };
 
 struct pkt_tx_queue {
@@ -64,11 +65,10 @@ struct pkt_tx_queue {
 	unsigned int framecount;
 	unsigned int framenum;
 
-	volatile unsigned long tx_pkts;
-	volatile unsigned long err_pkts;
-	volatile unsigned long tx_bytes;
+	struct rte_eth_counters stats;
 };
 
+
 struct pmd_internals {
 	unsigned nb_queues;
 
@@ -118,8 +118,6 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	uint8_t *pbuf;
 	struct pkt_rx_queue *pkt_q = queue;
-	uint16_t num_rx = 0;
-	unsigned long num_rx_bytes = 0;
 	unsigned int framecount, framenum;
 
 	if (unlikely(nb_pkts == 0))
@@ -164,13 +162,11 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* account for the receive frame */
 		bufs[i] = mbuf;
-		num_rx++;
-		num_rx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&pkt_q->stats, mbuf);
 	}
 	pkt_q->framenum = framenum;
-	pkt_q->rx_pkts += num_rx;
-	pkt_q->rx_bytes += num_rx_bytes;
-	return num_rx;
+
+	return i;
 }
 
 /*
@@ -205,8 +201,6 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	unsigned int framecount, framenum;
 	struct pollfd pfd;
 	struct pkt_tx_queue *pkt_q = queue;
-	uint16_t num_tx = 0;
-	unsigned long num_tx_bytes = 0;
 	int i;
 
 	if (unlikely(nb_pkts == 0))
@@ -285,8 +279,7 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			framenum = 0;
 		ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base;
 
-		num_tx++;
-		num_tx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&pkt_q->stats, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -298,15 +291,9 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		 * packets will be considered successful even though only some
 		 * are sent.
 		 */
-
-		num_tx = 0;
-		num_tx_bytes = 0;
 	}
 
 	pkt_q->framenum = framenum;
-	pkt_q->tx_pkts += num_tx;
-	pkt_q->err_pkts += i - num_tx;
-	pkt_q->tx_bytes += num_tx_bytes;
 	return i;
 }
 
@@ -386,58 +373,17 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
-		igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
-		rx_total += igb_stats->q_ipackets[i];
-		rx_bytes_total += igb_stats->q_ibytes[i];
-	}
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
-		igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
-		tx_total += igb_stats->q_opackets[i];
-		tx_err_total += internal->tx_queue[i].err_pkts;
-		tx_bytes_total += igb_stats->q_obytes[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->ibytes = rx_bytes_total;
-	igb_stats->opackets = tx_total;
-	igb_stats->oerrors = tx_err_total;
-	igb_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+					  offsetof(struct pkt_rx_queue, stats), stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->rx_queue[i].rx_pkts = 0;
-		internal->rx_queue[i].rx_bytes = 0;
-	}
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->tx_queue[i].tx_pkts = 0;
-		internal->tx_queue[i].err_pkts = 0;
-		internal->tx_queue[i].tx_bytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 static int
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v6 4/9] net/af_xdp: use generic SW stats
  2024-05-17  0:12   ` [PATCH v6 0/9] Generic 64 bit counters for SW drivers Stephen Hemminger
                       ` (2 preceding siblings ...)
  2024-05-17  0:12     ` [PATCH v6 3/9] net/af_packet: use generic SW stats Stephen Hemminger
@ 2024-05-17  0:12     ` Stephen Hemminger
  2024-05-17 13:34       ` Loftus, Ciara
  2024-05-17  0:12     ` [PATCH v6 5/9] net/pcap: " Stephen Hemminger
                       ` (4 subsequent siblings)
  8 siblings, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17  0:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Ciara Loftus

Use common code for all SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 98 ++++++++---------------------
 1 file changed, 25 insertions(+), 73 deletions(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 268a130c49..65fc2f478f 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -20,6 +20,7 @@
 #include <rte_ethdev.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
 #include <rte_string_fns.h>
@@ -120,19 +121,13 @@ struct xsk_umem_info {
 	uint32_t max_xsks;
 };
 
-struct rx_stats {
-	uint64_t rx_pkts;
-	uint64_t rx_bytes;
-	uint64_t rx_dropped;
-};
-
 struct pkt_rx_queue {
 	struct xsk_ring_cons rx;
 	struct xsk_umem_info *umem;
 	struct xsk_socket *xsk;
 	struct rte_mempool *mb_pool;
 
-	struct rx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct xsk_ring_prod fq;
 	struct xsk_ring_cons cq;
@@ -143,17 +138,11 @@ struct pkt_rx_queue {
 	int busy_budget;
 };
 
-struct tx_stats {
-	uint64_t tx_pkts;
-	uint64_t tx_bytes;
-	uint64_t tx_dropped;
-};
-
 struct pkt_tx_queue {
 	struct xsk_ring_prod tx;
 	struct xsk_umem_info *umem;
 
-	struct tx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct pkt_rx_queue *pair;
 	int xsk_queue_idx;
@@ -308,7 +297,6 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_ring_prod *fq = &rxq->fq;
 	struct xsk_umem_info *umem = rxq->umem;
 	uint32_t idx_rx = 0;
-	unsigned long rx_bytes = 0;
 	int i;
 	struct rte_mbuf *fq_bufs[ETH_AF_XDP_RX_BATCH_SIZE];
 
@@ -363,16 +351,13 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		rte_pktmbuf_pkt_len(bufs[i]) = len;
 		rte_pktmbuf_data_len(bufs[i]) = len;
-		rx_bytes += len;
+
+		rte_eth_count_mbuf(&rxq->stats, bufs[i]);
 	}
 
 	xsk_ring_cons__release(rx, nb_pkts);
 	(void)reserve_fill_queue(umem, nb_pkts, fq_bufs, fq);
 
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
-
 	return nb_pkts;
 }
 #else
@@ -384,7 +369,6 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_umem_info *umem = rxq->umem;
 	struct xsk_ring_prod *fq = &rxq->fq;
 	uint32_t idx_rx = 0;
-	unsigned long rx_bytes = 0;
 	int i;
 	uint32_t free_thresh = fq->size >> 1;
 	struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
@@ -424,16 +408,13 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		rte_ring_enqueue(umem->buf_ring, (void *)addr);
 		rte_pktmbuf_pkt_len(mbufs[i]) = len;
 		rte_pktmbuf_data_len(mbufs[i]) = len;
-		rx_bytes += len;
+		rte_eth_count_mbuf(&rxq->stats, mbufs[i]);
+
 		bufs[i] = mbufs[i];
 	}
 
 	xsk_ring_cons__release(rx, nb_pkts);
 
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
-
 	return nb_pkts;
 }
 #endif
@@ -527,9 +508,8 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct pkt_tx_queue *txq = queue;
 	struct xsk_umem_info *umem = txq->umem;
 	struct rte_mbuf *mbuf;
-	unsigned long tx_bytes = 0;
 	int i;
-	uint32_t idx_tx;
+	uint32_t idx_tx, pkt_len;
 	uint16_t count = 0;
 	struct xdp_desc *desc;
 	uint64_t addr, offset;
@@ -541,6 +521,7 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	for (i = 0; i < nb_pkts; i++) {
 		mbuf = bufs[i];
+		pkt_len = rte_pktmbuf_pkt_len(mbuf);
 
 		if (mbuf->pool == umem->mb_pool) {
 			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
@@ -589,17 +570,13 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			count++;
 		}
 
-		tx_bytes += mbuf->pkt_len;
+		rte_eth_count_packet(&txq->stats, pkt_len);
 	}
 
 out:
 	xsk_ring_prod__submit(&txq->tx, count);
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += count;
-	txq->stats.tx_bytes += tx_bytes;
-	txq->stats.tx_dropped += nb_pkts - count;
-
 	return count;
 }
 #else
@@ -610,7 +587,6 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_umem_info *umem = txq->umem;
 	struct rte_mbuf *mbuf;
 	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
-	unsigned long tx_bytes = 0;
 	int i;
 	uint32_t idx_tx;
 	struct xsk_ring_cons *cq = &txq->pair->cq;
@@ -640,7 +616,8 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		pkt = xsk_umem__get_data(umem->mz->addr,
 					 desc->addr);
 		rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len);
-		tx_bytes += mbuf->pkt_len;
+		rte_eth_qsw_update(&txq->stats, mbuf);
+
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -648,9 +625,6 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += nb_pkts;
-	txq->stats.tx_bytes += tx_bytes;
-
 	return nb_pkts;
 }
 
@@ -847,39 +821,26 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
 	struct pmd_process_private *process_private = dev->process_private;
-	struct xdp_statistics xdp_stats;
-	struct pkt_rx_queue *rxq;
-	struct pkt_tx_queue *txq;
-	socklen_t optlen;
-	int i, ret, fd;
+	unsigned int i;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		optlen = sizeof(struct xdp_statistics);
-		rxq = &internals->rx_queues[i];
-		txq = rxq->pair;
-		stats->q_ipackets[i] = rxq->stats.rx_pkts;
-		stats->q_ibytes[i] = rxq->stats.rx_bytes;
+	rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+				   offsetof(struct pkt_rx_queue, stats), stats);
 
-		stats->q_opackets[i] = txq->stats.tx_pkts;
-		stats->q_obytes[i] = txq->stats.tx_bytes;
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct xdp_statistics xdp_stats;
+		socklen_t optlen = sizeof(xdp_stats);
+		int fd;
 
-		stats->ipackets += stats->q_ipackets[i];
-		stats->ibytes += stats->q_ibytes[i];
-		stats->imissed += rxq->stats.rx_dropped;
-		stats->oerrors += txq->stats.tx_dropped;
 		fd = process_private->rxq_xsk_fds[i];
-		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
-					   &xdp_stats, &optlen) : -1;
-		if (ret != 0) {
+		if (fd < 0)
+			continue;
+		if (getsockopt(fd, SOL_XDP, XDP_STATISTICS,
+			       &xdp_stats, &optlen)  < 0) {
 			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
 			return -1;
 		}
 		stats->imissed += xdp_stats.rx_dropped;
-
-		stats->opackets += stats->q_opackets[i];
-		stats->obytes += stats->q_obytes[i];
 	}
 
 	return 0;
@@ -888,17 +849,8 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
-	int i;
-
-	for (i = 0; i < internals->queue_cnt; i++) {
-		memset(&internals->rx_queues[i].stats, 0,
-					sizeof(struct rx_stats));
-		memset(&internals->tx_queues[i].stats, 0,
-					sizeof(struct tx_stats));
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 #ifdef RTE_NET_AF_XDP_LIBBPF_XDP_ATTACH
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v6 5/9] net/pcap: use generic SW stats
  2024-05-17  0:12   ` [PATCH v6 0/9] Generic 64 bit counters for SW drivers Stephen Hemminger
                       ` (3 preceding siblings ...)
  2024-05-17  0:12     ` [PATCH v6 4/9] net/af_xdp: " Stephen Hemminger
@ 2024-05-17  0:12     ` Stephen Hemminger
  2024-05-17  0:12     ` [PATCH v6 6/9] test/pmd_ring: initialize mbufs Stephen Hemminger
                       ` (3 subsequent siblings)
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17  0:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use common statistics for SW drivers.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/pcap/pcap_ethdev.c | 125 +++++++--------------------------
 1 file changed, 26 insertions(+), 99 deletions(-)

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index bfec085045..b1a983f871 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -11,6 +11,7 @@
 
 #include <rte_cycles.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <ethdev_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_malloc.h>
@@ -48,13 +49,6 @@ static uint8_t iface_idx;
 static uint64_t timestamp_rx_dynflag;
 static int timestamp_dynfield_offset = -1;
 
-struct queue_stat {
-	volatile unsigned long pkts;
-	volatile unsigned long bytes;
-	volatile unsigned long err_pkts;
-	volatile unsigned long rx_nombuf;
-};
-
 struct queue_missed_stat {
 	/* last value retrieved from pcap */
 	unsigned int pcap;
@@ -68,7 +62,7 @@ struct pcap_rx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
 	struct rte_mempool *mb_pool;
-	struct queue_stat rx_stat;
+	struct rte_eth_counters rx_stat;
 	struct queue_missed_stat missed_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
@@ -80,7 +74,7 @@ struct pcap_rx_queue {
 struct pcap_tx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
-	struct queue_stat tx_stat;
+	struct rte_eth_counters tx_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
 };
@@ -238,7 +232,6 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	int i;
 	struct pcap_rx_queue *pcap_q = queue;
-	uint32_t rx_bytes = 0;
 
 	if (unlikely(nb_pkts == 0))
 		return 0;
@@ -252,39 +245,35 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		if (err)
 			return i;
 
+		rte_eth_count_mbuf(&pcap_q->rx_stat, pcap_buf);
+
 		rte_memcpy(rte_pktmbuf_mtod(bufs[i], void *),
 				rte_pktmbuf_mtod(pcap_buf, void *),
 				pcap_buf->data_len);
 		bufs[i]->data_len = pcap_buf->data_len;
 		bufs[i]->pkt_len = pcap_buf->pkt_len;
 		bufs[i]->port = pcap_q->port_id;
-		rx_bytes += pcap_buf->data_len;
+
 
 		/* Enqueue packet back on ring to allow infinite rx. */
 		rte_ring_enqueue(pcap_q->pkts, pcap_buf);
 	}
 
-	pcap_q->rx_stat.pkts += i;
-	pcap_q->rx_stat.bytes += rx_bytes;
-
 	return i;
 }
 
 static uint16_t
 eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
+	struct pcap_rx_queue *pcap_q = queue;
+	struct rte_eth_dev *dev = &rte_eth_devices[pcap_q->port_id];
+	struct pmd_process_private *pp = dev->process_private;
+	pcap_t *pcap = pp->rx_pcap[pcap_q->queue_id];
 	unsigned int i;
 	struct pcap_pkthdr header;
-	struct pmd_process_private *pp;
 	const u_char *packet;
 	struct rte_mbuf *mbuf;
-	struct pcap_rx_queue *pcap_q = queue;
 	uint16_t num_rx = 0;
-	uint32_t rx_bytes = 0;
-	pcap_t *pcap;
-
-	pp = rte_eth_devices[pcap_q->port_id].process_private;
-	pcap = pp->rx_pcap[pcap_q->queue_id];
 
 	if (unlikely(pcap == NULL || nb_pkts == 0))
 		return 0;
@@ -300,7 +289,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		mbuf = rte_pktmbuf_alloc(pcap_q->mb_pool);
 		if (unlikely(mbuf == NULL)) {
-			pcap_q->rx_stat.rx_nombuf++;
+			++dev->data->rx_mbuf_alloc_failed;
 			break;
 		}
 
@@ -315,7 +304,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 						       mbuf,
 						       packet,
 						       header.caplen) == -1)) {
-				pcap_q->rx_stat.err_pkts++;
+				rte_eth_count_error(&pcap_q->rx_stat);
 				rte_pktmbuf_free(mbuf);
 				break;
 			}
@@ -329,11 +318,10 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		mbuf->ol_flags |= timestamp_rx_dynflag;
 		mbuf->port = pcap_q->port_id;
 		bufs[num_rx] = mbuf;
+
+		rte_eth_count_mbuf(&pcap_q->rx_stat, mbuf);
 		num_rx++;
-		rx_bytes += header.caplen;
 	}
-	pcap_q->rx_stat.pkts += num_rx;
-	pcap_q->rx_stat.bytes += rx_bytes;
 
 	return num_rx;
 }
@@ -379,8 +367,6 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *dumper_q = queue;
-	uint16_t num_tx = 0;
-	uint32_t tx_bytes = 0;
 	struct pcap_pkthdr header;
 	pcap_dumper_t *dumper;
 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
@@ -412,8 +398,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		pcap_dump((u_char *)dumper, &header,
 			rte_pktmbuf_read(mbuf, 0, caplen, temp_data));
 
-		num_tx++;
-		tx_bytes += caplen;
+		rte_eth_count_mbuf(&dumper_q->tx_stat, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -423,9 +408,6 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	 * we flush the pcap dumper within each burst.
 	 */
 	pcap_dump_flush(dumper);
-	dumper_q->tx_stat.pkts += num_tx;
-	dumper_q->tx_stat.bytes += tx_bytes;
-	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
 
 	return nb_pkts;
 }
@@ -437,20 +419,16 @@ static uint16_t
 eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	unsigned int i;
-	uint32_t tx_bytes = 0;
 	struct pcap_tx_queue *tx_queue = queue;
 
 	if (unlikely(nb_pkts == 0))
 		return 0;
 
 	for (i = 0; i < nb_pkts; i++) {
-		tx_bytes += bufs[i]->pkt_len;
+		rte_eth_count_mbuf(&tx_queue->tx_stat, bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	tx_queue->tx_stat.pkts += nb_pkts;
-	tx_queue->tx_stat.bytes += tx_bytes;
-
 	return i;
 }
 
@@ -465,8 +443,6 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *tx_queue = queue;
-	uint16_t num_tx = 0;
-	uint32_t tx_bytes = 0;
 	pcap_t *pcap;
 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
 	size_t len;
@@ -497,15 +473,11 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			rte_pktmbuf_read(mbuf, 0, len, temp_data), len);
 		if (unlikely(ret != 0))
 			break;
-		num_tx++;
-		tx_bytes += len;
+
+		rte_eth_count_mbuf(&tx_queue->tx_stat, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
-	tx_queue->tx_stat.pkts += num_tx;
-	tx_queue->tx_stat.bytes += tx_bytes;
-	tx_queue->tx_stat.err_pkts += i - num_tx;
-
 	return i;
 }
 
@@ -746,41 +718,12 @@ static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned int i;
-	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
-	unsigned long rx_missed_total = 0;
-	unsigned long rx_nombuf_total = 0, rx_err_total = 0;
-	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
-	unsigned long tx_packets_err_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_queue[i].rx_stat.pkts;
-		stats->q_ibytes[i] = internal->rx_queue[i].rx_stat.bytes;
-		rx_nombuf_total += internal->rx_queue[i].rx_stat.rx_nombuf;
-		rx_err_total += internal->rx_queue[i].rx_stat.err_pkts;
-		rx_packets_total += stats->q_ipackets[i];
-		rx_bytes_total += stats->q_ibytes[i];
-		rx_missed_total += queue_missed_stat_get(dev, i);
-	}
 
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_queue[i].tx_stat.pkts;
-		stats->q_obytes[i] = internal->tx_queue[i].tx_stat.bytes;
-		tx_packets_total += stats->q_opackets[i];
-		tx_bytes_total += stats->q_obytes[i];
-		tx_packets_err_total += internal->tx_queue[i].tx_stat.err_pkts;
-	}
+	rte_eth_counters_stats_get(dev, offsetof(struct pcap_tx_queue, tx_stat),
+				   offsetof(struct pcap_rx_queue, rx_stat), stats);
 
-	stats->ipackets = rx_packets_total;
-	stats->ibytes = rx_bytes_total;
-	stats->imissed = rx_missed_total;
-	stats->ierrors = rx_err_total;
-	stats->rx_nombuf = rx_nombuf_total;
-	stats->opackets = tx_packets_total;
-	stats->obytes = tx_bytes_total;
-	stats->oerrors = tx_packets_err_total;
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		stats->imissed += queue_missed_stat_get(dev, i);
 
 	return 0;
 }
@@ -789,21 +732,12 @@ static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
 	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		internal->rx_queue[i].rx_stat.pkts = 0;
-		internal->rx_queue[i].rx_stat.bytes = 0;
-		internal->rx_queue[i].rx_stat.err_pkts = 0;
-		internal->rx_queue[i].rx_stat.rx_nombuf = 0;
-		queue_missed_stat_reset(dev, i);
-	}
+	rte_eth_counters_reset(dev, offsetof(struct pcap_tx_queue, tx_stat),
+			       offsetof(struct pcap_rx_queue, rx_stat));
 
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		internal->tx_queue[i].tx_stat.pkts = 0;
-		internal->tx_queue[i].tx_stat.bytes = 0;
-		internal->tx_queue[i].tx_stat.err_pkts = 0;
-	}
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		queue_missed_stat_reset(dev, i);
 
 	return 0;
 }
@@ -929,13 +863,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 				pcap_pkt_count);
 			return -EINVAL;
 		}
-
-		/*
-		 * Reset the stats for this queue since eth_pcap_rx calls above
-		 * didn't result in the application receiving packets.
-		 */
-		pcap_q->rx_stat.pkts = 0;
-		pcap_q->rx_stat.bytes = 0;
 	}
 
 	return 0;
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v6 6/9] test/pmd_ring: initialize mbufs
  2024-05-17  0:12   ` [PATCH v6 0/9] Generic 64 bit counters for SW drivers Stephen Hemminger
                       ` (4 preceding siblings ...)
  2024-05-17  0:12     ` [PATCH v6 5/9] net/pcap: " Stephen Hemminger
@ 2024-05-17  0:12     ` Stephen Hemminger
  2024-05-17  0:12     ` [PATCH v6 7/9] net/ring: use generic SW stats Stephen Hemminger
                       ` (2 subsequent siblings)
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17  0:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Bruce Richardson

Do not pass uninitialized data into the ring PMD.
The mbufs should be initialized first so that length is zero.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 app/test/test_pmd_ring.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/app/test/test_pmd_ring.c b/app/test/test_pmd_ring.c
index e83b9dd6b8..55455ece7f 100644
--- a/app/test/test_pmd_ring.c
+++ b/app/test/test_pmd_ring.c
@@ -19,6 +19,14 @@ static struct rte_mempool *mp;
 struct rte_ring *rxtx[NUM_RINGS];
 static int tx_porta, rx_portb, rxtx_portc, rxtx_portd, rxtx_porte;
 
+/* make a valid zero sized mbuf */
+static void
+test_mbuf_init(struct rte_mbuf *mbuf)
+{
+	memset(mbuf, 0, sizeof(*mbuf));
+	rte_pktmbuf_reset(mbuf);
+}
+
 static int
 test_ethdev_configure_port(int port)
 {
@@ -68,14 +76,16 @@ test_ethdev_configure_port(int port)
 static int
 test_send_basic_packets(void)
 {
-	struct rte_mbuf  bufs[RING_SIZE];
+	struct rte_mbuf bufs[RING_SIZE];
 	struct rte_mbuf *pbufs[RING_SIZE];
 	int i;
 
 	printf("Testing send and receive RING_SIZE/2 packets (tx_porta -> rx_portb)\n");
 
-	for (i = 0; i < RING_SIZE/2; i++)
+	for (i = 0; i < RING_SIZE / 2; i++) {
+		test_mbuf_init(&bufs[i]);
 		pbufs[i] = &bufs[i];
+	}
 
 	if (rte_eth_tx_burst(tx_porta, 0, pbufs, RING_SIZE/2) < RING_SIZE/2) {
 		printf("Failed to transmit packet burst port %d\n", tx_porta);
@@ -99,14 +109,16 @@ test_send_basic_packets(void)
 static int
 test_send_basic_packets_port(int port)
 {
-	struct rte_mbuf  bufs[RING_SIZE];
+	struct rte_mbuf bufs[RING_SIZE];
 	struct rte_mbuf *pbufs[RING_SIZE];
 	int i;
 
 	printf("Testing send and receive RING_SIZE/2 packets (cmdl_port0 -> cmdl_port0)\n");
 
-	for (i = 0; i < RING_SIZE/2; i++)
+	for (i = 0; i < RING_SIZE / 2; i++) {
+		test_mbuf_init(&bufs[i]);
 		pbufs[i] = &bufs[i];
+	}
 
 	if (rte_eth_tx_burst(port, 0, pbufs, RING_SIZE/2) < RING_SIZE/2) {
 		printf("Failed to transmit packet burst port %d\n", port);
@@ -134,10 +146,11 @@ test_get_stats(int port)
 	struct rte_eth_stats stats;
 	struct rte_mbuf buf, *pbuf = &buf;
 
+	test_mbuf_init(&buf);
+
 	printf("Testing ring PMD stats_get port %d\n", port);
 
 	/* check stats of RXTX port, should all be zero */
-
 	rte_eth_stats_get(port, &stats);
 	if (stats.ipackets != 0 || stats.opackets != 0 ||
 			stats.ibytes != 0 || stats.obytes != 0 ||
@@ -173,6 +186,8 @@ test_stats_reset(int port)
 	struct rte_eth_stats stats;
 	struct rte_mbuf buf, *pbuf = &buf;
 
+	test_mbuf_init(&buf);
+
 	printf("Testing ring PMD stats_reset port %d\n", port);
 
 	rte_eth_stats_reset(port);
@@ -228,6 +243,7 @@ test_pmd_ring_pair_create_attach(void)
 	int ret;
 
 	memset(&null_conf, 0, sizeof(struct rte_eth_conf));
+	test_mbuf_init(&buf);
 
 	if ((rte_eth_dev_configure(rxtx_portd, 1, 1, &null_conf) < 0)
 			|| (rte_eth_dev_configure(rxtx_porte, 1, 1,
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v6 7/9] net/ring: use generic SW stats
  2024-05-17  0:12   ` [PATCH v6 0/9] Generic 64 bit counters for SW drivers Stephen Hemminger
                       ` (5 preceding siblings ...)
  2024-05-17  0:12     ` [PATCH v6 6/9] test/pmd_ring: initialize mbufs Stephen Hemminger
@ 2024-05-17  0:12     ` Stephen Hemminger
  2024-05-17  0:12     ` [PATCH v6 8/9] net/tap: " Stephen Hemminger
  2024-05-17  0:12     ` [PATCH v6 9/9] net/null: " Stephen Hemminger
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17  0:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Bruce Richardson

Use generic per-queue infrastructure.
This also fixes bug where ring code was not accounting
for bytes.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/ring/rte_eth_ring.c | 71 +++++++++++++--------------------
 1 file changed, 28 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index 48953dd7a0..85f14dd679 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -7,6 +7,7 @@
 #include "rte_eth_ring.h"
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <rte_os_shim.h>
@@ -44,8 +45,8 @@ enum dev_action {
 
 struct ring_queue {
 	struct rte_ring *rng;
-	uint64_t rx_pkts;
-	uint64_t tx_pkts;
+
+	struct rte_eth_counters stats;
 };
 
 struct pmd_internals {
@@ -77,12 +78,13 @@ eth_ring_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
 	struct ring_queue *r = q;
-	const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SC_DEQ)
-		r->rx_pkts += nb_rx;
-	else
-		__atomic_fetch_add(&r->rx_pkts, nb_rx, __ATOMIC_RELAXED);
+	uint16_t i, nb_rx;
+
+	nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	for (i = 0; i < nb_rx; i++)
+		rte_eth_count_mbuf(&r->stats, bufs[i]);
+
 	return nb_rx;
 }
 
@@ -90,13 +92,20 @@ static uint16_t
 eth_ring_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
+	uint32_t *sizes;
 	struct ring_queue *r = q;
-	const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SP_ENQ)
-		r->tx_pkts += nb_tx;
-	else
-		__atomic_fetch_add(&r->tx_pkts, nb_tx, __ATOMIC_RELAXED);
+	uint16_t i, nb_tx;
+
+	sizes = alloca(sizeof(uint32_t) * nb_bufs);
+
+	for (i = 0; i < nb_bufs; i++)
+		sizes[i] = rte_pktmbuf_pkt_len(bufs[i]);
+
+	nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	for (i = 0; i < nb_tx; i++)
+		rte_eth_count_packet(&r->stats, sizes[i]);
+
 	return nb_tx;
 }
 
@@ -193,40 +202,16 @@ eth_dev_info(struct rte_eth_dev *dev,
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
-		rx_total += stats->q_ipackets[i];
-	}
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
-		tx_total += stats->q_opackets[i];
-	}
-
-	stats->ipackets = rx_total;
-	stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct ring_queue, stats),
+					  offsetof(struct ring_queue, stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		internal->rx_ring_queues[i].rx_pkts = 0;
-	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		internal->tx_ring_queues[i].tx_pkts = 0;
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct ring_queue, stats),
+				      offsetof(struct ring_queue, stats));
 }
 
 static void
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v6 8/9] net/tap: use generic SW stats
  2024-05-17  0:12   ` [PATCH v6 0/9] Generic 64 bit counters for SW drivers Stephen Hemminger
                       ` (6 preceding siblings ...)
  2024-05-17  0:12     ` [PATCH v6 7/9] net/ring: use generic SW stats Stephen Hemminger
@ 2024-05-17  0:12     ` Stephen Hemminger
  2024-05-17  0:12     ` [PATCH v6 9/9] net/null: " Stephen Hemminger
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17  0:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use new common sw statistics.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/tap/rte_eth_tap.c | 88 ++++++-----------------------------
 drivers/net/tap/rte_eth_tap.h | 15 ++----
 2 files changed, 18 insertions(+), 85 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 69d9da695b..f87979da4f 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -432,7 +432,6 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rx_queue *rxq = queue;
 	struct pmd_process_private *process_private;
 	uint16_t num_rx;
-	unsigned long num_rx_bytes = 0;
 	uint32_t trigger = tap_trigger;
 
 	if (trigger == rxq->trigger_seen)
@@ -455,7 +454,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* Packet couldn't fit in the provided mbuf */
 		if (unlikely(rxq->pi.flags & TUN_PKT_STRIP)) {
-			rxq->stats.ierrors++;
+			rte_eth_count_error(&rxq->stats);
 			continue;
 		}
 
@@ -467,7 +466,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
 
 			if (unlikely(!buf)) {
-				rxq->stats.rx_nombuf++;
+				struct rte_eth_dev *dev = &rte_eth_devices[rxq->in_port];
+				++dev->data->rx_mbuf_alloc_failed;
+
 				/* No new buf has been allocated: do nothing */
 				if (!new_tail || !seg)
 					goto end;
@@ -509,11 +510,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* account for the receive frame */
 		bufs[num_rx++] = mbuf;
-		num_rx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&rxq->stats, mbuf);
 	}
 end:
-	rxq->stats.ipackets += num_rx;
-	rxq->stats.ibytes += num_rx_bytes;
 
 	if (trigger && num_rx < nb_pkts)
 		rxq->trigger_seen = trigger;
@@ -523,8 +522,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 static inline int
 tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
-			struct rte_mbuf **pmbufs,
-			uint16_t *num_packets, unsigned long *num_tx_bytes)
+		struct rte_mbuf **pmbufs)
 {
 	struct pmd_process_private *process_private;
 	int i;
@@ -647,8 +645,7 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
 		if (n <= 0)
 			return -1;
 
-		(*num_packets)++;
-		(*num_tx_bytes) += rte_pktmbuf_pkt_len(mbuf);
+		rte_eth_count_mbuf(&txq->stats, mbuf);
 	}
 	return 0;
 }
@@ -660,8 +657,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	struct tx_queue *txq = queue;
 	uint16_t num_tx = 0;
-	uint16_t num_packets = 0;
-	unsigned long num_tx_bytes = 0;
 	uint32_t max_size;
 	int i;
 
@@ -693,7 +688,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			tso_segsz = mbuf_in->tso_segsz + hdrs_len;
 			if (unlikely(tso_segsz == hdrs_len) ||
 				tso_segsz > *txq->mtu) {
-				txq->stats.errs++;
+				rte_eth_count_error(&txq->stats);
 				break;
 			}
 			gso_ctx->gso_size = tso_segsz;
@@ -728,10 +723,10 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			num_mbufs = 1;
 		}
 
-		ret = tap_write_mbufs(txq, num_mbufs, mbuf,
-				&num_packets, &num_tx_bytes);
+		ret = tap_write_mbufs(txq, num_mbufs, mbuf);
 		if (ret == -1) {
-			txq->stats.errs++;
+			rte_eth_count_error(&txq->stats);
+
 			/* free tso mbufs */
 			if (num_tso_mbufs > 0)
 				rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs);
@@ -749,10 +744,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		}
 	}
 
-	txq->stats.opackets += num_packets;
-	txq->stats.errs += nb_pkts - num_tx;
-	txq->stats.obytes += num_tx_bytes;
-
 	return num_tx;
 }
 
@@ -1055,64 +1046,15 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
 {
-	unsigned int i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	unsigned long rx_nombuf = 0, ierrors = 0;
-	const struct pmd_internals *pmd = dev->data->dev_private;
-
-	/* rx queue statistics */
-	imax = (dev->data->nb_rx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_rx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
-		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
-		rx_total += tap_stats->q_ipackets[i];
-		rx_bytes_total += tap_stats->q_ibytes[i];
-		rx_nombuf += pmd->rxq[i].stats.rx_nombuf;
-		ierrors += pmd->rxq[i].stats.ierrors;
-	}
-
-	/* tx queue statistics */
-	imax = (dev->data->nb_tx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_tx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
-		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
-		tx_total += tap_stats->q_opackets[i];
-		tx_err_total += pmd->txq[i].stats.errs;
-		tx_bytes_total += tap_stats->q_obytes[i];
-	}
-
-	tap_stats->ipackets = rx_total;
-	tap_stats->ibytes = rx_bytes_total;
-	tap_stats->ierrors = ierrors;
-	tap_stats->rx_nombuf = rx_nombuf;
-	tap_stats->opackets = tx_total;
-	tap_stats->oerrors = tx_err_total;
-	tap_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct tx_queue, stats),
+					  offsetof(struct rx_queue, stats), tap_stats);
 }
 
 static int
 tap_stats_reset(struct rte_eth_dev *dev)
 {
-	int i;
-	struct pmd_internals *pmd = dev->data->dev_private;
-
-	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-		pmd->rxq[i].stats.ipackets = 0;
-		pmd->rxq[i].stats.ibytes = 0;
-		pmd->rxq[i].stats.ierrors = 0;
-		pmd->rxq[i].stats.rx_nombuf = 0;
-
-		pmd->txq[i].stats.opackets = 0;
-		pmd->txq[i].stats.errs = 0;
-		pmd->txq[i].stats.obytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct tx_queue, stats),
+				      offsetof(struct rx_queue, stats));
 }
 
 static int
diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
index 5ac93f93e9..8cba9ea410 100644
--- a/drivers/net/tap/rte_eth_tap.h
+++ b/drivers/net/tap/rte_eth_tap.h
@@ -14,6 +14,7 @@
 #include <linux/if_tun.h>
 
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_ether.h>
 #include <rte_gso.h>
 #include "tap_log.h"
@@ -32,23 +33,13 @@ enum rte_tuntap_type {
 	ETH_TUNTAP_TYPE_MAX,
 };
 
-struct pkt_stats {
-	uint64_t opackets;              /* Number of output packets */
-	uint64_t ipackets;              /* Number of input packets */
-	uint64_t obytes;                /* Number of bytes on output */
-	uint64_t ibytes;                /* Number of bytes on input */
-	uint64_t errs;                  /* Number of TX error packets */
-	uint64_t ierrors;               /* Number of RX error packets */
-	uint64_t rx_nombuf;             /* Nb of RX mbuf alloc failures */
-};
-
 struct rx_queue {
 	struct rte_mempool *mp;         /* Mempool for RX packets */
 	uint32_t trigger_seen;          /* Last seen Rx trigger value */
 	uint16_t in_port;               /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-	struct pkt_stats stats;         /* Stats for this RX queue */
 	uint16_t nb_rx_desc;            /* max number of mbufs available */
+	struct rte_eth_counters stats;  /* Stats for this RX queue */
 	struct rte_eth_rxmode *rxmode;  /* RX features */
 	struct rte_mbuf *pool;          /* mbufs pool for this queue */
 	struct iovec (*iovecs)[];       /* descriptors for this queue */
@@ -59,7 +50,7 @@ struct tx_queue {
 	int type;                       /* Type field - TUN|TAP */
 	uint16_t *mtu;                  /* Pointer to MTU from dev_data */
 	uint16_t csum:1;                /* Enable checksum offloading */
-	struct pkt_stats stats;         /* Stats for this TX queue */
+	struct rte_eth_counters stats;	/* Stats for this TX queue */
 	struct rte_gso_ctx gso_ctx;     /* GSO context */
 	uint16_t out_port;              /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v6 9/9] net/null: use generic SW stats
  2024-05-17  0:12   ` [PATCH v6 0/9] Generic 64 bit counters for SW drivers Stephen Hemminger
                       ` (7 preceding siblings ...)
  2024-05-17  0:12     ` [PATCH v6 8/9] net/tap: " Stephen Hemminger
@ 2024-05-17  0:12     ` Stephen Hemminger
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17  0:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Tetsuya Mukawa

Use the new common code for statistics.
This also fixes the bug that this driver was not accounting
for bytes.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/null/rte_eth_null.c | 80 +++++++--------------------------
 1 file changed, 17 insertions(+), 63 deletions(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 7c46004f1e..7786982732 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -8,6 +8,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <bus_vdev_driver.h>
@@ -37,8 +38,8 @@ struct null_queue {
 	struct rte_mempool *mb_pool;
 	struct rte_mbuf *dummy_packet;
 
-	uint64_t rx_pkts;
-	uint64_t tx_pkts;
+	struct rte_eth_counters tx_stats;
+	struct rte_eth_counters rx_stats;
 };
 
 struct pmd_options {
@@ -99,11 +100,9 @@ eth_null_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		bufs[i]->data_len = (uint16_t)packet_size;
 		bufs[i]->pkt_len = packet_size;
 		bufs[i]->port = h->internals->port_id;
+		rte_eth_count_mbuf(&h->rx_stats, bufs[i]);
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
-
 	return i;
 }
 
@@ -127,11 +126,9 @@ eth_null_copy_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		bufs[i]->data_len = (uint16_t)packet_size;
 		bufs[i]->pkt_len = packet_size;
 		bufs[i]->port = h->internals->port_id;
+		rte_eth_count_mbuf(&h->rx_stats, bufs[i]);
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
-
 	return i;
 }
 
@@ -151,11 +148,10 @@ eth_null_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	if ((q == NULL) || (bufs == NULL))
 		return 0;
 
-	for (i = 0; i < nb_bufs; i++)
+	for (i = 0; i < nb_bufs; i++) {
+		rte_eth_count_mbuf(&h->tx_stats, bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
-
-	/* NOTE: review for potential ordering optimization */
-	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
+	}
 
 	return i;
 }
@@ -174,12 +170,10 @@ eth_null_copy_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	for (i = 0; i < nb_bufs; i++) {
 		rte_memcpy(h->dummy_packet, rte_pktmbuf_mtod(bufs[i], void *),
 					packet_size);
+		rte_eth_count_mbuf(&h->tx_stats, bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
-
 	return i;
 }
 
@@ -322,60 +316,20 @@ eth_dev_info(struct rte_eth_dev *dev,
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i, num_stats;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal;
-
-	if ((dev == NULL) || (igb_stats == NULL))
-		return -EINVAL;
-
-	internal = dev->data->dev_private;
-	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
-			RTE_MIN(dev->data->nb_rx_queues,
-				RTE_DIM(internal->rx_null_queues)));
-	for (i = 0; i < num_stats; i++) {
-		/* NOTE: review for atomic access */
-		igb_stats->q_ipackets[i] =
-			internal->rx_null_queues[i].rx_pkts;
-		rx_total += igb_stats->q_ipackets[i];
-	}
-
-	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
-			RTE_MIN(dev->data->nb_tx_queues,
-				RTE_DIM(internal->tx_null_queues)));
-	for (i = 0; i < num_stats; i++) {
-		/* NOTE: review for atomic access */
-		igb_stats->q_opackets[i] =
-			internal->tx_null_queues[i].tx_pkts;
-		tx_total += igb_stats->q_opackets[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev,
+					  offsetof(struct null_queue, tx_stats),
+					  offsetof(struct null_queue, rx_stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal;
-
-	if (dev == NULL)
-		return -EINVAL;
-
-	internal = dev->data->dev_private;
-	for (i = 0; i < RTE_DIM(internal->rx_null_queues); i++)
-		/* NOTE: review for atomic access */
-		internal->rx_null_queues[i].rx_pkts = 0;
-	for (i = 0; i < RTE_DIM(internal->tx_null_queues); i++)
-		/* NOTE: review for atomic access */
-		internal->tx_null_queues[i].tx_pkts = 0;
-
-	return 0;
+	return rte_eth_counters_reset(dev,
+				      offsetof(struct null_queue, tx_stats),
+				      offsetof(struct null_queue, rx_stats));
 }
 
 static void
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v5 2/9] ethdev: add common counters for statistics
  2024-05-16 18:30       ` Wathsala Wathawana Vithanage
@ 2024-05-17  0:19         ` Stephen Hemminger
  0 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17  0:19 UTC (permalink / raw)
  To: Wathsala Wathawana Vithanage
  Cc: dev, thomas, Ferruh Yigit, Andrew Rybchenko, nd

On Thu, 16 May 2024 18:30:52 +0000
Wathsala Wathawana Vithanage <wathsala.vithanage@arm.com> wrote:

> > +		packets = rte_counter64_fetch(&counters->packets);
> > +		bytes = rte_counter64_fetch(&counters->bytes);
> > +		errors = rte_counter64_fetch(&counters->errors);
> > +
> > +		rte_compiler_barrier();
> > +
> > +		stats->ipackets += packets;
> > +		stats->ibytes += bytes;
> > +		stats->ierrors += errors;
> > +  
> 
> there seems to be a dependency chain in the above loads and subsequent stores.
> If that's the case what's the purpose of the compiler barrier?
> 
> --wathsala

It is so that the counters returned per queue are the same as the
count across all queues. A really annoying compiler could in the
process of inlining everything on 64 bit, decide that it didn't need
to have these temporary variables.  It could read counters->packets
twice, once for the total and once for the per-queue values. In the
intervening window, the PMD could have received another packet.

This is a pre-existing purely theoretical bug across all these
drivers. The Linux kernel has started using tools to find these.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v5 1/9] eal: generic 64 bit counter
  2024-05-16 21:42         ` Stephen Hemminger
@ 2024-05-17  2:39           ` Honnappa Nagarahalli
  2024-05-17  3:29             ` Stephen Hemminger
  0 siblings, 1 reply; 179+ messages in thread
From: Honnappa Nagarahalli @ 2024-05-17  2:39 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Wathsala Wathawana Vithanage, dev, Morten Brørup, nd



> On May 16, 2024, at 4:42 PM, Stephen Hemminger <stephen@networkplumber.org> wrote:
> 
> On Thu, 16 May 2024 18:22:23 +0000
> Wathsala Wathawana Vithanage <wathsala.vithanage@arm.com> wrote:
> 
>> Hi Stephen,
>> 
>>> +__rte_experimental
>>> +static inline uint64_t
>>> +rte_counter64_fetch(const rte_counter64_t *counter) {
>>> + return *counter;
>>> +}  
>> 
>> What if the address pointed by counter is not aligned and the
>> architecture doesn't support atomic (untorn) loads on non-aligned loads? 
>> 
>> --wathsala
> 
> Then the driver is using it incorrectly. For the use case of a set of counters
> (even if embedded in another struct), the compiler takes care of this.
> 
> Remember this is an internal API, not something that needs to handle user
> abuse.
If it is internal API, should the API name have double underscore prefix to indicate the same?


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 1/9] eal: generic 64 bit counter
  2024-05-17  0:12     ` [PATCH v6 1/9] eal: generic 64 bit counter Stephen Hemminger
@ 2024-05-17  2:45       ` Honnappa Nagarahalli
  2024-05-17  3:30         ` Stephen Hemminger
  0 siblings, 1 reply; 179+ messages in thread
From: Honnappa Nagarahalli @ 2024-05-17  2:45 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Morten Brørup, nd



> On May 16, 2024, at 7:12 PM, Stephen Hemminger <stephen@networkplumber.org> wrote:
> 
> This header implements 64 bit counters that are NOT atomic
> but are safe against load/store splits on 32 bit platforms.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---
> lib/eal/include/meson.build   |  1 +
> lib/eal/include/rte_counter.h | 98 +++++++++++++++++++++++++++++++++++
> 2 files changed, 99 insertions(+)
> create mode 100644 lib/eal/include/rte_counter.h
> 
> diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
> index e94b056d46..c070dd0079 100644
> --- a/lib/eal/include/meson.build
> +++ b/lib/eal/include/meson.build
> @@ -12,6 +12,7 @@ headers += files(
>         'rte_class.h',
>         'rte_common.h',
>         'rte_compat.h',
> +        'rte_counter.h',
>         'rte_debug.h',
>         'rte_dev.h',
>         'rte_devargs.h',
> diff --git a/lib/eal/include/rte_counter.h b/lib/eal/include/rte_counter.h
> new file mode 100644
> index 0000000000..d623195d63
> --- /dev/null
> +++ b/lib/eal/include/rte_counter.h
> @@ -0,0 +1,98 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
> + */
> +
> +#ifndef _RTE_COUNTER_H_
> +#define _RTE_COUNTER_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/**
> + * @file
> + * RTE Counter
> + *
> + * A counter is 64 bit value that is safe from split read/write
> + * on 32 bit platforms. It assumes that only one cpu at a time
If we are defining the counter in this manner, then implementation cannot be generic. I think architectures will have constraints if they have to ensure the 64b variables are not split.

I think we at least need the counter to be aligned on 8B boundary to have generic code.

> + * will update the counter, and another CPU may want to read it.
> + *
> + * This is a much weaker guarantee than full atomic variables
> + * but is faster since no locked operations are required for update.
> + */
> +
> +#ifdef RTE_ARCH_64
> +/*
> + * On a platform that can support native 64 bit type, no special handling.
> + * These are just wrapper around 64 bit value.
> + */
> +typedef uint64_t rte_counter64_t;
> +
> +/**
> + * Add value to counter.
> + */
> +__rte_experimental
> +static inline void
> +rte_counter64_add(rte_counter64_t *counter, uint32_t val)
> +{
> + *counter += val;
> +}
> +
> +__rte_experimental
> +static inline uint64_t
> +rte_counter64_fetch(const rte_counter64_t *counter)
> +{
> + return *counter;
> +}
> +
> +__rte_experimental
> +static inline void
> +rte_counter64_set(rte_counter64_t *counter, uint64_t val)
> +{
> + *counter = val;
> +}
> +
> +#else
> +
> +#include <rte_stdatomic.h>
> +
> +/*
> + * On a 32 bit platform need to use atomic to force the compler to not
> + * split 64 bit read/write.
> + */
> +typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
> +
> +__rte_experimental
> +static inline void
> +rte_counter64_add(rte_counter64_t *counter, uint32_t val)
> +{
> + rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
> +}
> +
> +__rte_experimental
> +static inline uint64_t
> +rte_counter64_fetch(const rte_counter64_t *counter)
> +{
> + return rte_atomic_load_explicit(counter, rte_memory_order_consume);
> +}
> +
> +__rte_experimental
> +static inline void
> +rte_counter64_set(rte_counter64_t *counter, uint64_t val)
> +{
> + rte_atomic_store_explicit(counter, val, rte_memory_order_release);
> +}
> +#endif
> +
> +__rte_experimental
> +static inline void
> +rte_counter64_reset(rte_counter64_t *counter)
> +{
> + rte_counter64_set(counter, 0);
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_COUNTER_H_ */
> -- 
> 2.43.0
> 


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v5 1/9] eal: generic 64 bit counter
  2024-05-17  2:39           ` Honnappa Nagarahalli
@ 2024-05-17  3:29             ` Stephen Hemminger
  2024-05-17  4:39               ` Honnappa Nagarahalli
  0 siblings, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17  3:29 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: Wathsala Wathawana Vithanage, dev, Morten Brørup, nd

On Fri, 17 May 2024 02:39:02 +0000
Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:

> > On May 16, 2024, at 4:42 PM, Stephen Hemminger <stephen@networkplumber.org> wrote:
> > 
> > On Thu, 16 May 2024 18:22:23 +0000
> > Wathsala Wathawana Vithanage <wathsala.vithanage@arm.com> wrote:
> >   
> >> Hi Stephen,
> >>   
> >>> +__rte_experimental
> >>> +static inline uint64_t
> >>> +rte_counter64_fetch(const rte_counter64_t *counter) {
> >>> + return *counter;
> >>> +}    
> >> 
> >> What if the address pointed by counter is not aligned and the
> >> architecture doesn't support atomic (untorn) loads on non-aligned loads? 
> >> 
> >> --wathsala  
> > 
> > Then the driver is using it incorrectly. For the use case of a set of counters
> > (even if embedded in another struct), the compiler takes care of this.
> > 
> > Remember this is an internal API, not something that needs to handle user
> > abuse.  
> If it is internal API, should the API name have double underscore prefix to indicate the same?
> 

Other parts of ethdev have internal API's with similar names already.
None of them use underscore.

INTERNAL {
	global:

	rte_eth_dev_allocate;
	rte_eth_dev_allocated;
	rte_eth_dev_attach_secondary;
	rte_eth_dev_callback_process;
	rte_eth_dev_create;
	rte_eth_dev_destroy;
	rte_eth_dev_get_by_name;
	rte_eth_dev_is_rx_hairpin_queue;
	rte_eth_dev_is_tx_hairpin_queue;
	rte_eth_dev_probing_finish;
	rte_eth_dev_release_port;
	rte_eth_dev_internal_reset;
	rte_eth_devargs_parse;
	rte_eth_devices;
	rte_eth_dma_zone_free;
	rte_eth_dma_zone_reserve;
	rte_eth_hairpin_queue_peer_bind;
	rte_eth_hairpin_queue_peer_unbind;
	rte_eth_hairpin_queue_peer_update;
	rte_eth_ip_reassembly_dynfield_register;
	rte_eth_link_speed_ethtool; # WINDOWS_NO_EXPORT
	rte_eth_link_speed_glink; # WINDOWS_NO_EXPORT
	rte_eth_link_speed_gset; # WINDOWS_NO_EXPORT
	rte_eth_pkt_burst_dummy;
	rte_eth_representor_id_get;
	rte_eth_switch_domain_alloc;
	rte_eth_switch_domain_free;
	rte_flow_fp_default_ops;
};

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 1/9] eal: generic 64 bit counter
  2024-05-17  2:45       ` Honnappa Nagarahalli
@ 2024-05-17  3:30         ` Stephen Hemminger
  2024-05-17  4:26           ` Honnappa Nagarahalli
  0 siblings, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17  3:30 UTC (permalink / raw)
  To: Honnappa Nagarahalli; +Cc: dev, Morten Brørup, nd

On Fri, 17 May 2024 02:45:12 +0000
Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:

> > + * A counter is 64 bit value that is safe from split read/write
> > + * on 32 bit platforms. It assumes that only one cpu at a time  
> If we are defining the counter in this manner, then implementation cannot be generic. I think architectures will have constraints if they have to ensure the 64b variables are not split.
> 
> I think we at least need the counter to be aligned on 8B boundary to have generic code.

The C standard has always guaranteed that read and write to unsigned log will not be split.
Therefore if arch is 64 bit native there is no need for atomics

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 1/9] eal: generic 64 bit counter
  2024-05-17  3:30         ` Stephen Hemminger
@ 2024-05-17  4:26           ` Honnappa Nagarahalli
  2024-05-17  6:44             ` Morten Brørup
  2024-05-17 15:07             ` Stephen Hemminger
  0 siblings, 2 replies; 179+ messages in thread
From: Honnappa Nagarahalli @ 2024-05-17  4:26 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Morten Brørup, nd, Richardson, Bruce

+ Bruce for feedback on x86 architecture 

> On May 16, 2024, at 10:30 PM, Stephen Hemminger <stephen@networkplumber.org> wrote:
> 
> On Fri, 17 May 2024 02:45:12 +0000
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
>>> + * A counter is 64 bit value that is safe from split read/write
>>> + * on 32 bit platforms. It assumes that only one cpu at a time  
>> If we are defining the counter in this manner, then implementation cannot be generic. I think architectures will have constraints if they have to ensure the 64b variables are not split.
>> 
>> I think we at least need the counter to be aligned on 8B boundary to have generic code.
> 
> The C standard has always guaranteed that read and write to unsigned log will not be split.
As I understand, this is true only if the variable is an atomic variable. If not, there is no difference between atomic variables and non-atomic variables.

> Therefore if arch is 64 bit native there is no need for atomics
At least on ARM architecture, if the variable is not aligned on 8B boundary, the load or store are not atomic. I am sure it is the same on other architectures.
Bruce, any comments for x86?


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v5 1/9] eal: generic 64 bit counter
  2024-05-17  3:29             ` Stephen Hemminger
@ 2024-05-17  4:39               ` Honnappa Nagarahalli
  0 siblings, 0 replies; 179+ messages in thread
From: Honnappa Nagarahalli @ 2024-05-17  4:39 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Wathsala Wathawana Vithanage, dev, Morten Brørup, nd



> On May 16, 2024, at 10:29 PM, Stephen Hemminger <stephen@networkplumber.org> wrote:
> 
> On Fri, 17 May 2024 02:39:02 +0000
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
>>> On May 16, 2024, at 4:42 PM, Stephen Hemminger <stephen@networkplumber.org> wrote:
>>> 
>>> On Thu, 16 May 2024 18:22:23 +0000
>>> Wathsala Wathawana Vithanage <wathsala.vithanage@arm.com> wrote:
>>> 
>>>> Hi Stephen,
>>>> 
>>>>> +__rte_experimental
>>>>> +static inline uint64_t
>>>>> +rte_counter64_fetch(const rte_counter64_t *counter) {
>>>>> + return *counter;
>>>>> +}    
>>>> 
>>>> What if the address pointed by counter is not aligned and the
>>>> architecture doesn't support atomic (untorn) loads on non-aligned loads? 
>>>> 
>>>> --wathsala  
>>> 
>>> Then the driver is using it incorrectly. For the use case of a set of counters
>>> (even if embedded in another struct), the compiler takes care of this.
>>> 
>>> Remember this is an internal API, not something that needs to handle user
>>> abuse.  
>> If it is internal API, should the API name have double underscore prefix to indicate the same?
>> 
> 
> Other parts of ethdev have internal API's with similar names already.
> None of them use underscore.
At least, these are marked internal.
Should we use ‘@internal’ in the function descriptions?

> 
> INTERNAL {
> global:
> 
> rte_eth_dev_allocate;
> rte_eth_dev_allocated;
> rte_eth_dev_attach_secondary;
> rte_eth_dev_callback_process;
> rte_eth_dev_create;
> rte_eth_dev_destroy;
> rte_eth_dev_get_by_name;
> rte_eth_dev_is_rx_hairpin_queue;
> rte_eth_dev_is_tx_hairpin_queue;
> rte_eth_dev_probing_finish;
> rte_eth_dev_release_port;
> rte_eth_dev_internal_reset;
> rte_eth_devargs_parse;
> rte_eth_devices;
> rte_eth_dma_zone_free;
> rte_eth_dma_zone_reserve;
> rte_eth_hairpin_queue_peer_bind;
> rte_eth_hairpin_queue_peer_unbind;
> rte_eth_hairpin_queue_peer_update;
> rte_eth_ip_reassembly_dynfield_register;
> rte_eth_link_speed_ethtool; # WINDOWS_NO_EXPORT
> rte_eth_link_speed_glink; # WINDOWS_NO_EXPORT
> rte_eth_link_speed_gset; # WINDOWS_NO_EXPORT
> rte_eth_pkt_burst_dummy;
> rte_eth_representor_id_get;
> rte_eth_switch_domain_alloc;
> rte_eth_switch_domain_free;
> rte_flow_fp_default_ops;
> };


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v6 1/9] eal: generic 64 bit counter
  2024-05-17  4:26           ` Honnappa Nagarahalli
@ 2024-05-17  6:44             ` Morten Brørup
  2024-05-17 15:05               ` Stephen Hemminger
  2024-05-17 16:18               ` Stephen Hemminger
  2024-05-17 15:07             ` Stephen Hemminger
  1 sibling, 2 replies; 179+ messages in thread
From: Morten Brørup @ 2024-05-17  6:44 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Stephen Hemminger; +Cc: dev, nd, Richardson, Bruce

> From: Honnappa Nagarahalli [mailto:Honnappa.Nagarahalli@arm.com]
> Sent: Friday, 17 May 2024 06.27
> 
> + Bruce for feedback on x86 architecture
> 
> > On May 16, 2024, at 10:30 PM, Stephen Hemminger <stephen@networkplumber.org>
> wrote:
> >
> > On Fri, 17 May 2024 02:45:12 +0000
> > Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> >
> >>> + * A counter is 64 bit value that is safe from split read/write
> >>> + * on 32 bit platforms. It assumes that only one cpu at a time
> >> If we are defining the counter in this manner, then implementation cannot
> be generic. I think architectures will have constraints if they have to ensure
> the 64b variables are not split.
> >>
> >> I think we at least need the counter to be aligned on 8B boundary to have
> generic code.
> >
> > The C standard has always guaranteed that read and write to unsigned log
> will not be split.
> As I understand, this is true only if the variable is an atomic variable. If
> not, there is no difference between atomic variables and non-atomic variables.
> 
> > Therefore if arch is 64 bit native there is no need for atomics
> At least on ARM architecture, if the variable is not aligned on 8B boundary,
> the load or store are not atomic. I am sure it is the same on other
> architectures.

I guess it depends on the architecture's natural alignment size and the compiler - especially on 32 bit architectures, where the natural alignment size is 4 bytes.

We could play it safe and add alignment to the counter type:

#include <stdalign.h>
#ifdef RTE_ARCH_64
#if alignof(uint64_t) < sizeof(uint64_t)
typedef alignas(8) uint64_t rte_counter64_t;
#else
typedef uint64_t rte_counter64_t;
#endif
#else
#if alignof(RTE_ATOMIC(uint64_t)) < sizeof(uint64_t)
typedef alignas(8) RTE_ATOMIC(uint64_t) rte_counter64_t;
#else
typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
#endif
#endif

This above is intentionally verbose, because some 32 bit architectures can implement 64 bit non-tearing counters without atomics [1], and the new 64 bit counter library should be prepared for accepting such optimizations.
[1]: https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F433@smartserver.smartshare.dk/


Picking up on another branch of this discussion:
This 64 bit counter library is *not* an internal API. Applications should use it for their counters too.

> Bruce, any comments for x86?


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v6 4/9] net/af_xdp: use generic SW stats
  2024-05-17  0:12     ` [PATCH v6 4/9] net/af_xdp: " Stephen Hemminger
@ 2024-05-17 13:34       ` Loftus, Ciara
  2024-05-17 14:54         ` Stephen Hemminger
  0 siblings, 1 reply; 179+ messages in thread
From: Loftus, Ciara @ 2024-05-17 13:34 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev

> Subject: [PATCH v6 4/9] net/af_xdp: use generic SW stats
> 
> Use common code for all SW stats.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  drivers/net/af_xdp/rte_eth_af_xdp.c | 98 ++++++++---------------------
>  1 file changed, 25 insertions(+), 73 deletions(-)
> 
> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c
> b/drivers/net/af_xdp/rte_eth_af_xdp.c
> index 268a130c49..65fc2f478f 100644
> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c

[snip]

> @@ -541,6 +521,7 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs,
> uint16_t nb_pkts)
> 
>  	for (i = 0; i < nb_pkts; i++) {
>  		mbuf = bufs[i];
> +		pkt_len = rte_pktmbuf_pkt_len(mbuf);
> 
>  		if (mbuf->pool == umem->mb_pool) {
>  			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
> @@ -589,17 +570,13 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs,
> uint16_t nb_pkts)
>  			count++;
>  		}
> 
> -		tx_bytes += mbuf->pkt_len;
> +		rte_eth_count_packet(&txq->stats, pkt_len);

This change resolves the bugzilla you reported recently (1440 - use after free in af_xdp). Should this be mentioned in the commit message? We probably still need a separate patch for backporting that can be used without this entire series.

>  	}
> 
>  out:
>  	xsk_ring_prod__submit(&txq->tx, count);
>  	kick_tx(txq, cq);
> 
> -	txq->stats.tx_pkts += count;
> -	txq->stats.tx_bytes += tx_bytes;
> -	txq->stats.tx_dropped += nb_pkts - count;
> -
>  	return count;
>  }
>  #else
> @@ -610,7 +587,6 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs,
> uint16_t nb_pkts)
>  	struct xsk_umem_info *umem = txq->umem;
>  	struct rte_mbuf *mbuf;
>  	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
> -	unsigned long tx_bytes = 0;
>  	int i;
>  	uint32_t idx_tx;
>  	struct xsk_ring_cons *cq = &txq->pair->cq;
> @@ -640,7 +616,8 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs,
> uint16_t nb_pkts)
>  		pkt = xsk_umem__get_data(umem->mz->addr,
>  					 desc->addr);
>  		rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc-
> >len);
> -		tx_bytes += mbuf->pkt_len;
> +		rte_eth_qsw_update(&txq->stats, mbuf);

Typo? Assume this should be rte_eth_count_packet

> +
>  		rte_pktmbuf_free(mbuf);
>  	}
> 
> @@ -648,9 +625,6 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs,
> uint16_t nb_pkts)
> 
>  	kick_tx(txq, cq);
> 
> -	txq->stats.tx_pkts += nb_pkts;
> -	txq->stats.tx_bytes += tx_bytes;
> -
>  	return nb_pkts;
>  }
> 
> @@ -847,39 +821,26 @@ eth_dev_info(struct rte_eth_dev *dev, struct
> rte_eth_dev_info *dev_info)
>  static int
>  eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
>  {
> -	struct pmd_internals *internals = dev->data->dev_private;
>  	struct pmd_process_private *process_private = dev->process_private;
> -	struct xdp_statistics xdp_stats;
> -	struct pkt_rx_queue *rxq;
> -	struct pkt_tx_queue *txq;
> -	socklen_t optlen;
> -	int i, ret, fd;
> +	unsigned int i;
> 
> -	for (i = 0; i < dev->data->nb_rx_queues; i++) {
> -		optlen = sizeof(struct xdp_statistics);
> -		rxq = &internals->rx_queues[i];
> -		txq = rxq->pair;
> -		stats->q_ipackets[i] = rxq->stats.rx_pkts;
> -		stats->q_ibytes[i] = rxq->stats.rx_bytes;
> +	rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
> +				   offsetof(struct pkt_rx_queue, stats), stats);
> 
> -		stats->q_opackets[i] = txq->stats.tx_pkts;
> -		stats->q_obytes[i] = txq->stats.tx_bytes;
> +	for (i = 0; i < dev->data->nb_rx_queues; i++) {
> +		struct xdp_statistics xdp_stats;
> +		socklen_t optlen = sizeof(xdp_stats);
> +		int fd;
> 
> -		stats->ipackets += stats->q_ipackets[i];
> -		stats->ibytes += stats->q_ibytes[i];
> -		stats->imissed += rxq->stats.rx_dropped;
> -		stats->oerrors += txq->stats.tx_dropped;
>  		fd = process_private->rxq_xsk_fds[i];
> -		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
> -					   &xdp_stats, &optlen) : -1;
> -		if (ret != 0) {
> +		if (fd < 0)
> +			continue;
> +		if (getsockopt(fd, SOL_XDP, XDP_STATISTICS,
> +			       &xdp_stats, &optlen)  < 0) {
>  			AF_XDP_LOG(ERR, "getsockopt() failed for
> XDP_STATISTICS.\n");
>  			return -1;
>  		}
>  		stats->imissed += xdp_stats.rx_dropped;
> -
> -		stats->opackets += stats->q_opackets[i];
> -		stats->obytes += stats->q_obytes[i];
>  	}
> 
>  	return 0;
> @@ -888,17 +849,8 @@ eth_stats_get(struct rte_eth_dev *dev, struct
> rte_eth_stats *stats)
>  static int
>  eth_stats_reset(struct rte_eth_dev *dev)
>  {
> -	struct pmd_internals *internals = dev->data->dev_private;
> -	int i;
> -
> -	for (i = 0; i < internals->queue_cnt; i++) {
> -		memset(&internals->rx_queues[i].stats, 0,
> -					sizeof(struct rx_stats));
> -		memset(&internals->tx_queues[i].stats, 0,
> -					sizeof(struct tx_stats));
> -	}
> -
> -	return 0;
> +	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue,
> stats),
> +				      offsetof(struct pkt_rx_queue, stats));
>  }
> 
>  #ifdef RTE_NET_AF_XDP_LIBBPF_XDP_ATTACH
> --
> 2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 4/9] net/af_xdp: use generic SW stats
  2024-05-17 13:34       ` Loftus, Ciara
@ 2024-05-17 14:54         ` Stephen Hemminger
  0 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17 14:54 UTC (permalink / raw)
  To: Loftus, Ciara; +Cc: dev

On Fri, 17 May 2024 13:34:30 +0000
"Loftus, Ciara" <ciara.loftus@intel.com> wrote:

> > -		tx_bytes += mbuf->pkt_len;
> > +		rte_eth_count_packet(&txq->stats, pkt_len);  
> 
> This change resolves the bugzilla you reported recently (1440 - use after free in af_xdp). Should this be mentioned in the commit message? We probably still need a separate patch for backporting that can be used without this entire series.

I suspect it will take longer for this patch set to get integrated.
So fixing the bug sooner would make sense.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 1/9] eal: generic 64 bit counter
  2024-05-17  6:44             ` Morten Brørup
@ 2024-05-17 15:05               ` Stephen Hemminger
  2024-05-17 16:18               ` Stephen Hemminger
  1 sibling, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17 15:05 UTC (permalink / raw)
  To: Morten Brørup; +Cc: Honnappa Nagarahalli, dev, nd, Richardson, Bruce

On Fri, 17 May 2024 08:44:42 +0200
Morten Brørup <mb@smartsharesystems.com> wrote:

> I guess it depends on the architecture's natural alignment size and the compiler - especially on 32 bit architectures, where the natural alignment size is 4 bytes.
> 
> We could play it safe and add alignment to the counter type:
> 
> #include <stdalign.h>
> #ifdef RTE_ARCH_64
> #if alignof(uint64_t) < sizeof(uint64_t)

I don't think this case is possible, what architecture is that broken?

> typedef alignas(8) uint64_t rte_counter64_t;
> #else
> typedef uint64_t rte_counter64_t;
> #endif
> #else
> #if alignof(RTE_ATOMIC(uint64_t)) < sizeof(uint64_t)
> typedef alignas(8) RTE_ATOMIC(uint64_t) rte_counter64_t;
> #else
> typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
> #endif
> #endif

The bigger question is how to detect 32 bit x86 being safe without atomic?
(and does it still matter).

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 1/9] eal: generic 64 bit counter
  2024-05-17  4:26           ` Honnappa Nagarahalli
  2024-05-17  6:44             ` Morten Brørup
@ 2024-05-17 15:07             ` Stephen Hemminger
  1 sibling, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17 15:07 UTC (permalink / raw)
  To: Honnappa Nagarahalli; +Cc: dev, Morten Brørup, nd, Richardson, Bruce

On Fri, 17 May 2024 04:26:58 +0000
Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:

> > 
> > The C standard has always guaranteed that read and write to unsigned log will not be split.  
> As I understand, this is true only if the variable is an atomic variable. If not, there is no difference between atomic variables and non-atomic variables.

Let me look.
It certainly falls under the "if a compiler did this it is crazy and would never be able to run Linux" per Linus.

> 
> > Therefore if arch is 64 bit native there is no need for atomics  
> At least on ARM architecture, if the variable is not aligned on 8B boundary, the load or store are not atomic. I am sure it is the same on other architectures.
> Bruce, any comments for x86?


This code needs to assume alignment to 8B. I'll document that.
But no runtime checks please.


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 1/9] eal: generic 64 bit counter
  2024-05-17  6:44             ` Morten Brørup
  2024-05-17 15:05               ` Stephen Hemminger
@ 2024-05-17 16:18               ` Stephen Hemminger
  2024-05-18 14:00                 ` Morten Brørup
  1 sibling, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17 16:18 UTC (permalink / raw)
  To: Morten Brørup; +Cc: Honnappa Nagarahalli, dev, nd, Richardson, Bruce

On Fri, 17 May 2024 08:44:42 +0200
Morten Brørup <mb@smartsharesystems.com> wrote:

> > From: Honnappa Nagarahalli [mailto:Honnappa.Nagarahalli@arm.com]
> > Sent: Friday, 17 May 2024 06.27
> > 
> > + Bruce for feedback on x86 architecture
> >   
> > > On May 16, 2024, at 10:30 PM, Stephen Hemminger <stephen@networkplumber.org>  
> > wrote:  
> > >
> > > On Fri, 17 May 2024 02:45:12 +0000
> > > Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> > >  
> > >>> + * A counter is 64 bit value that is safe from split read/write
> > >>> + * on 32 bit platforms. It assumes that only one cpu at a time  
> > >> If we are defining the counter in this manner, then implementation cannot  
> > be generic. I think architectures will have constraints if they have to ensure
> > the 64b variables are not split.  
> > >>
> > >> I think we at least need the counter to be aligned on 8B boundary to have  
> > generic code.  
> > >
> > > The C standard has always guaranteed that read and write to unsigned log  
> > will not be split.
> > As I understand, this is true only if the variable is an atomic variable. If
> > not, there is no difference between atomic variables and non-atomic variables.
> >   
> > > Therefore if arch is 64 bit native there is no need for atomics  
> > At least on ARM architecture, if the variable is not aligned on 8B boundary,
> > the load or store are not atomic. I am sure it is the same on other
> > architectures.  

After reading this: Who's afraid of a big bad optimizing compiler?
 https://lwn.net/Articles/793253/

Looks like you are right, and atomic or read/write once is required.
Perhaps introducing rte_read_once and rte_write_once is good idea?
Several drivers are implementing it already.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v7 0/9] Use weak atomic operations for SW PMD counters
  2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
                     ` (8 preceding siblings ...)
  2024-05-17  0:12   ` [PATCH v6 0/9] Generic 64 bit counters for SW drivers Stephen Hemminger
@ 2024-05-17 17:35   ` Stephen Hemminger
  2024-05-17 17:35     ` [PATCH v7 1/9] eal: generic 64 bit counter Stephen Hemminger
                       ` (8 more replies)
  2024-05-21 17:00   ` [PATCH v8 0/8] Common statistics routines for SW based PMD's Stephen Hemminger
                     ` (2 subsequent siblings)
  12 siblings, 9 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17 17:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Adds common code for collecting basic statistics used by many
SW based PMD's such as af_packet, af_xdp, null, tap and ring.

This version uses std atomic on all platforms. The memory order
is chosen to be the best performance but still providing the
guarantees expected of PMD counters.

By using common code, it also fixes missing counts in several drivers.
And fixes several bugs.

v7 - use stdatomic even on 64 bit. Does cause extra locked operation
     for increment on x86 64 but will be safe.
   - rebase

Stephen Hemminger (9):
  eal: generic 64 bit counter
  ethdev: add common counters for statistics
  net/af_packet: use generic SW stats
  net/af_xdp: use generic SW stats
  net/pcap: use generic SW stats
  test/pmd_ring: initialize mbufs
  net/ring: use generic SW stats
  net/tap: use generic SW stats
  net/null: use generic SW stats

 app/test/test_pmd_ring.c                  |  26 ++++-
 drivers/net/af_packet/rte_eth_af_packet.c |  82 +++-----------
 drivers/net/af_xdp/rte_eth_af_xdp.c       |  97 +++++------------
 drivers/net/null/rte_eth_null.c           |  80 +++-----------
 drivers/net/pcap/pcap_ethdev.c            | 125 +++++-----------------
 drivers/net/ring/rte_eth_ring.c           |  71 +++++-------
 drivers/net/tap/rte_eth_tap.c             |  88 +++------------
 drivers/net/tap/rte_eth_tap.h             |  15 +--
 lib/eal/include/meson.build               |   1 +
 lib/eal/include/rte_counter.h             | 116 ++++++++++++++++++++
 lib/ethdev/ethdev_swstats.c               | 101 +++++++++++++++++
 lib/ethdev/ethdev_swstats.h               | 124 +++++++++++++++++++++
 lib/ethdev/meson.build                    |   2 +
 lib/ethdev/version.map                    |   3 +
 14 files changed, 495 insertions(+), 436 deletions(-)
 create mode 100644 lib/eal/include/rte_counter.h
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v7 1/9] eal: generic 64 bit counter
  2024-05-17 17:35   ` [PATCH v7 0/9] Use weak atomic operations for SW PMD counters Stephen Hemminger
@ 2024-05-17 17:35     ` Stephen Hemminger
  2024-05-17 17:35     ` [PATCH v7 2/9] ethdev: add common counters for statistics Stephen Hemminger
                       ` (7 subsequent siblings)
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17 17:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Morten Brørup, Tyler Retzlaff

This header implements 64 bit counters using atomic
operations but with a weak memory ordering so that
they are safe against load/store splits on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 lib/eal/include/meson.build   |   1 +
 lib/eal/include/rte_counter.h | 116 ++++++++++++++++++++++++++++++++++
 2 files changed, 117 insertions(+)
 create mode 100644 lib/eal/include/rte_counter.h

diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
index e94b056d46..c070dd0079 100644
--- a/lib/eal/include/meson.build
+++ b/lib/eal/include/meson.build
@@ -12,6 +12,7 @@ headers += files(
         'rte_class.h',
         'rte_common.h',
         'rte_compat.h',
+        'rte_counter.h',
         'rte_debug.h',
         'rte_dev.h',
         'rte_devargs.h',
diff --git a/lib/eal/include/rte_counter.h b/lib/eal/include/rte_counter.h
new file mode 100644
index 0000000000..cdaa426e12
--- /dev/null
+++ b/lib/eal/include/rte_counter.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_COUNTER_H_
+#define _RTE_COUNTER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_stdatomic.h>
+
+/**
+ * @file
+ * RTE Counter
+ *
+ * A counter is 64 bit value that is safe from split read/write.
+ * It assumes that only one cpu at a time  will update the counter,
+ * and another CPU may want to read it.
+ *
+ * This is a weaker subset of full atomic variables.
+ *
+ * The counters are subject to the restrictions of atomic variables
+ * in packed structures or unaligned.
+ */
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * The RTE counter type.
+ */
+typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Add value to counter.
+ * Assumes this operation is only done by one thread on the object.
+ *
+ * @param counter
+ *    A pointer to the counter.
+ * @param val
+ *    The value to add to the counter.
+ */
+__rte_experimental
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Read a counter.
+ * This operation can be done by any thread.
+ *
+ * @param counter
+ *    A pointer to the counter.
+ * @return
+ *  The current value of the counter.
+ */
+__rte_experimental
+static inline uint64_t
+rte_counter64_fetch(const rte_counter64_t *counter)
+{
+	return rte_atomic_load_explicit(counter, rte_memory_order_consume);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set a counter.
+ * This operation can be done by any thread.
+ *
+ * @param counter
+ *    A pointer to the counter.
+ * @param val
+ *    Value to set counter to.
+ */
+__rte_experimental
+static inline void
+rte_counter64_set(rte_counter64_t *counter, uint64_t val)
+{
+	rte_atomic_store_explicit(counter, val, rte_memory_order_release);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Reset a counter to zero.
+ * This operation can be done by any thread.
+ *
+ * @param counter
+ *    A pointer to the counter.
+ */
+__rte_experimental
+static inline void
+rte_counter64_reset(rte_counter64_t *counter)
+{
+	rte_counter64_set(counter, 0);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_COUNTER_H_ */
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v7 2/9] ethdev: add common counters for statistics
  2024-05-17 17:35   ` [PATCH v7 0/9] Use weak atomic operations for SW PMD counters Stephen Hemminger
  2024-05-17 17:35     ` [PATCH v7 1/9] eal: generic 64 bit counter Stephen Hemminger
@ 2024-05-17 17:35     ` Stephen Hemminger
  2024-05-17 17:35     ` [PATCH v7 3/9] net/af_packet: use generic SW stats Stephen Hemminger
                       ` (6 subsequent siblings)
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17 17:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko

Introduce common helper routines for keeping track of per-queue
statistics in SW PMD's. The code in several drivers had
copy/pasted the same code for this, but had common issues
with 64 bit counters on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/ethdev/ethdev_swstats.c | 101 +++++++++++++++++++++++++++++
 lib/ethdev/ethdev_swstats.h | 124 ++++++++++++++++++++++++++++++++++++
 lib/ethdev/meson.build      |   2 +
 lib/ethdev/version.map      |   3 +
 4 files changed, 230 insertions(+)
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

diff --git a/lib/ethdev/ethdev_swstats.c b/lib/ethdev/ethdev_swstats.c
new file mode 100644
index 0000000000..555f5f592b
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.c
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+
+#include <rte_config.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
+#include "rte_ethdev.h"
+#include "ethdev_driver.h"
+#include "ethdev_swstats.h"
+
+int
+rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset,
+			   struct rte_eth_stats *stats)
+{
+	uint64_t packets, bytes, errors;
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		const void *txq = dev->data->tx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (txq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)txq + tx_offset);
+		packets = rte_counter64_fetch(&counters->packets);
+		bytes = rte_counter64_fetch(&counters->bytes);
+		errors = rte_counter64_fetch(&counters->errors);
+
+		stats->opackets += packets;
+		stats->obytes += bytes;
+		stats->oerrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_opackets[i] = packets;
+			stats->q_obytes[i] = bytes;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		const void *rxq = dev->data->rx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)rxq + rx_offset);
+		packets = rte_counter64_fetch(&counters->packets);
+		bytes = rte_counter64_fetch(&counters->bytes);
+		errors = rte_counter64_fetch(&counters->errors);
+
+		stats->ipackets += packets;
+		stats->ibytes += bytes;
+		stats->ierrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_ipackets[i] = packets;
+			stats->q_ibytes[i] = bytes;
+		}
+	}
+
+	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
+	return 0;
+}
+
+int
+rte_eth_counters_reset(struct rte_eth_dev *dev, size_t tx_offset, size_t rx_offset)
+{
+	struct rte_eth_counters *counters;
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		void *txq  = dev->data->tx_queues[i];
+
+		if (txq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)txq + tx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		void *rxq  = dev->data->rx_queues[i];
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)rxq + rx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	return 0;
+}
diff --git a/lib/ethdev/ethdev_swstats.h b/lib/ethdev/ethdev_swstats.h
new file mode 100644
index 0000000000..808c540640
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_ETHDEV_SWSTATS_H_
+#define _RTE_ETHDEV_SWSTATS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ *
+ * Internal statistics counters for software based devices.
+ * Hardware PMD's should use the hardware counters instead.
+ *
+ * This provides a library for PMD's to keep track of packets and bytes.
+ * It is assumed that this will be used per queue and queues are not
+ * shared by lcores.
+ */
+
+#include <rte_counter.h>
+
+/**
+ * A structure to be embedded in the device driver per-queue data.
+ */
+struct rte_eth_counters {
+	rte_counter64_t	packets;	/**< Total number of packets. */
+	rte_counter64_t	bytes;		/**< Total number of bytes. */
+	rte_counter64_t	errors;		/**< Total number of packets with errors. */
+};
+
+/**
+ * @internal
+ * Increment counters for a single packet.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ * @param sz
+ *    Size of the packet in bytes.
+ */
+__rte_internal
+static inline void
+rte_eth_count_packet(struct rte_eth_counters *counters, uint32_t sz)
+{
+	rte_counter64_add(&counters->packets, 1);
+	rte_counter64_add(&counters->bytes, sz);
+}
+
+/**
+ * @internal
+ * Increment counters based on mbuf.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ * @param mbuf
+ *    Received or transmitted mbuf.
+ */
+__rte_internal
+static inline void
+rte_eth_count_mbuf(struct rte_eth_counters *counters, const struct rte_mbuf *mbuf)
+{
+	rte_eth_count_packet(counters, rte_pktmbuf_pkt_len(mbuf));
+}
+
+/**
+ * @internal
+ * Increment error counter.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ */
+__rte_internal
+static inline void
+rte_eth_count_error(struct rte_eth_counters *counters)
+{
+	rte_counter64_add(&counters->errors, 1);
+}
+
+/**
+ * @internal
+ * Retrieve the general statistics for all queues.
+ * @see rte_eth_stats_get.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @param stats
+ *   A pointer to a structure of type *rte_eth_stats* to be filled
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			       size_t tx_offset, size_t rx_offset,
+			       struct rte_eth_stats *stats);
+
+/**
+ * @internal
+ * Reset the statistics for all queues.
+ * @see rte_eth_stats_reset.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_reset(struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHDEV_SWSTATS_H_ */
diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build
index f1d2586591..7ce29a46d4 100644
--- a/lib/ethdev/meson.build
+++ b/lib/ethdev/meson.build
@@ -3,6 +3,7 @@
 
 sources = files(
         'ethdev_driver.c',
+        'ethdev_swstats.c',
         'ethdev_private.c',
         'ethdev_profile.c',
         'ethdev_trace_points.c',
@@ -42,6 +43,7 @@ driver_sdk_headers += files(
         'ethdev_driver.h',
         'ethdev_pci.h',
         'ethdev_vdev.h',
+        'ethdev_swstats.h',
 )
 
 if is_linux
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 79f6f5293b..fc595be278 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -358,4 +358,7 @@ INTERNAL {
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
 	rte_flow_fp_default_ops;
+
+	rte_eth_counters_reset;
+	rte_eth_counters_stats_get;
 };
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v7 3/9] net/af_packet: use generic SW stats
  2024-05-17 17:35   ` [PATCH v7 0/9] Use weak atomic operations for SW PMD counters Stephen Hemminger
  2024-05-17 17:35     ` [PATCH v7 1/9] eal: generic 64 bit counter Stephen Hemminger
  2024-05-17 17:35     ` [PATCH v7 2/9] ethdev: add common counters for statistics Stephen Hemminger
@ 2024-05-17 17:35     ` Stephen Hemminger
  2024-05-17 17:35     ` [PATCH v7 4/9] net/af_xdp: " Stephen Hemminger
                       ` (5 subsequent siblings)
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17 17:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, John W. Linville

Use the new generic SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_packet/rte_eth_af_packet.c | 82 ++++-------------------
 1 file changed, 14 insertions(+), 68 deletions(-)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 397a32db58..89b737e7dc 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -10,6 +10,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
@@ -29,6 +30,7 @@
 #include <unistd.h>
 #include <poll.h>
 
+
 #define ETH_AF_PACKET_IFACE_ARG		"iface"
 #define ETH_AF_PACKET_NUM_Q_ARG		"qpairs"
 #define ETH_AF_PACKET_BLOCKSIZE_ARG	"blocksz"
@@ -51,8 +53,7 @@ struct pkt_rx_queue {
 	uint16_t in_port;
 	uint8_t vlan_strip;
 
-	volatile unsigned long rx_pkts;
-	volatile unsigned long rx_bytes;
+	struct rte_eth_counters stats;
 };
 
 struct pkt_tx_queue {
@@ -64,11 +65,10 @@ struct pkt_tx_queue {
 	unsigned int framecount;
 	unsigned int framenum;
 
-	volatile unsigned long tx_pkts;
-	volatile unsigned long err_pkts;
-	volatile unsigned long tx_bytes;
+	struct rte_eth_counters stats;
 };
 
+
 struct pmd_internals {
 	unsigned nb_queues;
 
@@ -118,8 +118,6 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	uint8_t *pbuf;
 	struct pkt_rx_queue *pkt_q = queue;
-	uint16_t num_rx = 0;
-	unsigned long num_rx_bytes = 0;
 	unsigned int framecount, framenum;
 
 	if (unlikely(nb_pkts == 0))
@@ -164,13 +162,11 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* account for the receive frame */
 		bufs[i] = mbuf;
-		num_rx++;
-		num_rx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&pkt_q->stats, mbuf);
 	}
 	pkt_q->framenum = framenum;
-	pkt_q->rx_pkts += num_rx;
-	pkt_q->rx_bytes += num_rx_bytes;
-	return num_rx;
+
+	return i;
 }
 
 /*
@@ -205,8 +201,6 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	unsigned int framecount, framenum;
 	struct pollfd pfd;
 	struct pkt_tx_queue *pkt_q = queue;
-	uint16_t num_tx = 0;
-	unsigned long num_tx_bytes = 0;
 	int i;
 
 	if (unlikely(nb_pkts == 0))
@@ -285,8 +279,7 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			framenum = 0;
 		ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base;
 
-		num_tx++;
-		num_tx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&pkt_q->stats, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -298,15 +291,9 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		 * packets will be considered successful even though only some
 		 * are sent.
 		 */
-
-		num_tx = 0;
-		num_tx_bytes = 0;
 	}
 
 	pkt_q->framenum = framenum;
-	pkt_q->tx_pkts += num_tx;
-	pkt_q->err_pkts += i - num_tx;
-	pkt_q->tx_bytes += num_tx_bytes;
 	return i;
 }
 
@@ -386,58 +373,17 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
-		igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
-		rx_total += igb_stats->q_ipackets[i];
-		rx_bytes_total += igb_stats->q_ibytes[i];
-	}
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
-		igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
-		tx_total += igb_stats->q_opackets[i];
-		tx_err_total += internal->tx_queue[i].err_pkts;
-		tx_bytes_total += igb_stats->q_obytes[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->ibytes = rx_bytes_total;
-	igb_stats->opackets = tx_total;
-	igb_stats->oerrors = tx_err_total;
-	igb_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+					  offsetof(struct pkt_rx_queue, stats), stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->rx_queue[i].rx_pkts = 0;
-		internal->rx_queue[i].rx_bytes = 0;
-	}
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->tx_queue[i].tx_pkts = 0;
-		internal->tx_queue[i].err_pkts = 0;
-		internal->tx_queue[i].tx_bytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 static int
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v7 4/9] net/af_xdp: use generic SW stats
  2024-05-17 17:35   ` [PATCH v7 0/9] Use weak atomic operations for SW PMD counters Stephen Hemminger
                       ` (2 preceding siblings ...)
  2024-05-17 17:35     ` [PATCH v7 3/9] net/af_packet: use generic SW stats Stephen Hemminger
@ 2024-05-17 17:35     ` Stephen Hemminger
  2024-05-17 17:35     ` [PATCH v7 5/9] net/pcap: " Stephen Hemminger
                       ` (4 subsequent siblings)
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17 17:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Ciara Loftus

Use common code for all SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 97 +++++++----------------------
 1 file changed, 24 insertions(+), 73 deletions(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 6ba455bb9b..e5228a1dc1 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -20,6 +20,7 @@
 #include <rte_ethdev.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
 #include <rte_string_fns.h>
@@ -120,19 +121,13 @@ struct xsk_umem_info {
 	uint32_t max_xsks;
 };
 
-struct rx_stats {
-	uint64_t rx_pkts;
-	uint64_t rx_bytes;
-	uint64_t rx_dropped;
-};
-
 struct pkt_rx_queue {
 	struct xsk_ring_cons rx;
 	struct xsk_umem_info *umem;
 	struct xsk_socket *xsk;
 	struct rte_mempool *mb_pool;
 
-	struct rx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct xsk_ring_prod fq;
 	struct xsk_ring_cons cq;
@@ -143,17 +138,11 @@ struct pkt_rx_queue {
 	int busy_budget;
 };
 
-struct tx_stats {
-	uint64_t tx_pkts;
-	uint64_t tx_bytes;
-	uint64_t tx_dropped;
-};
-
 struct pkt_tx_queue {
 	struct xsk_ring_prod tx;
 	struct xsk_umem_info *umem;
 
-	struct tx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct pkt_rx_queue *pair;
 	int xsk_queue_idx;
@@ -308,7 +297,6 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_ring_prod *fq = &rxq->fq;
 	struct xsk_umem_info *umem = rxq->umem;
 	uint32_t idx_rx = 0;
-	unsigned long rx_bytes = 0;
 	int i;
 	struct rte_mbuf *fq_bufs[ETH_AF_XDP_RX_BATCH_SIZE];
 
@@ -363,16 +351,13 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		rte_pktmbuf_pkt_len(bufs[i]) = len;
 		rte_pktmbuf_data_len(bufs[i]) = len;
-		rx_bytes += len;
+
+		rte_eth_count_mbuf(&rxq->stats, bufs[i]);
 	}
 
 	xsk_ring_cons__release(rx, nb_pkts);
 	(void)reserve_fill_queue(umem, nb_pkts, fq_bufs, fq);
 
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
-
 	return nb_pkts;
 }
 #else
@@ -384,7 +369,6 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_umem_info *umem = rxq->umem;
 	struct xsk_ring_prod *fq = &rxq->fq;
 	uint32_t idx_rx = 0;
-	unsigned long rx_bytes = 0;
 	int i;
 	uint32_t free_thresh = fq->size >> 1;
 	struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
@@ -424,16 +408,13 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		rte_ring_enqueue(umem->buf_ring, (void *)addr);
 		rte_pktmbuf_pkt_len(mbufs[i]) = len;
 		rte_pktmbuf_data_len(mbufs[i]) = len;
-		rx_bytes += len;
+		rte_eth_count_mbuf(&rxq->stats, mbufs[i]);
+
 		bufs[i] = mbufs[i];
 	}
 
 	xsk_ring_cons__release(rx, nb_pkts);
 
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
-
 	return nb_pkts;
 }
 #endif
@@ -527,9 +508,8 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct pkt_tx_queue *txq = queue;
 	struct xsk_umem_info *umem = txq->umem;
 	struct rte_mbuf *mbuf;
-	unsigned long tx_bytes = 0;
 	int i;
-	uint32_t idx_tx;
+	uint32_t idx_tx, pkt_len;
 	uint16_t count = 0;
 	struct xdp_desc *desc;
 	uint64_t addr, offset;
@@ -541,6 +521,7 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	for (i = 0; i < nb_pkts; i++) {
 		mbuf = bufs[i];
+		pkt_len = rte_pktmbuf_pkt_len(mbuf);
 
 		if (mbuf->pool == umem->mb_pool) {
 			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
@@ -589,17 +570,13 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			count++;
 		}
 
-		tx_bytes += mbuf->pkt_len;
+		rte_eth_count_packet(&txq->stats, pkt_len);
 	}
 
 out:
 	xsk_ring_prod__submit(&txq->tx, count);
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += count;
-	txq->stats.tx_bytes += tx_bytes;
-	txq->stats.tx_dropped += nb_pkts - count;
-
 	return count;
 }
 #else
@@ -610,7 +587,6 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_umem_info *umem = txq->umem;
 	struct rte_mbuf *mbuf;
 	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
-	unsigned long tx_bytes = 0;
 	int i;
 	uint32_t idx_tx;
 	struct xsk_ring_cons *cq = &txq->pair->cq;
@@ -640,7 +616,7 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		pkt = xsk_umem__get_data(umem->mz->addr,
 					 desc->addr);
 		rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len);
-		tx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&txq->stats, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -648,9 +624,6 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += nb_pkts;
-	txq->stats.tx_bytes += tx_bytes;
-
 	return nb_pkts;
 }
 
@@ -847,39 +820,26 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
 	struct pmd_process_private *process_private = dev->process_private;
-	struct xdp_statistics xdp_stats;
-	struct pkt_rx_queue *rxq;
-	struct pkt_tx_queue *txq;
-	socklen_t optlen;
-	int i, ret, fd;
+	unsigned int i;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		optlen = sizeof(struct xdp_statistics);
-		rxq = &internals->rx_queues[i];
-		txq = rxq->pair;
-		stats->q_ipackets[i] = rxq->stats.rx_pkts;
-		stats->q_ibytes[i] = rxq->stats.rx_bytes;
+	rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+				   offsetof(struct pkt_rx_queue, stats), stats);
 
-		stats->q_opackets[i] = txq->stats.tx_pkts;
-		stats->q_obytes[i] = txq->stats.tx_bytes;
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct xdp_statistics xdp_stats;
+		socklen_t optlen = sizeof(xdp_stats);
+		int fd;
 
-		stats->ipackets += stats->q_ipackets[i];
-		stats->ibytes += stats->q_ibytes[i];
-		stats->imissed += rxq->stats.rx_dropped;
-		stats->oerrors += txq->stats.tx_dropped;
 		fd = process_private->rxq_xsk_fds[i];
-		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
-					   &xdp_stats, &optlen) : -1;
-		if (ret != 0) {
+		if (fd < 0)
+			continue;
+		if (getsockopt(fd, SOL_XDP, XDP_STATISTICS,
+			       &xdp_stats, &optlen)  < 0) {
 			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
 			return -1;
 		}
 		stats->imissed += xdp_stats.rx_dropped;
-
-		stats->opackets += stats->q_opackets[i];
-		stats->obytes += stats->q_obytes[i];
 	}
 
 	return 0;
@@ -888,17 +848,8 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
-	int i;
-
-	for (i = 0; i < internals->queue_cnt; i++) {
-		memset(&internals->rx_queues[i].stats, 0,
-					sizeof(struct rx_stats));
-		memset(&internals->tx_queues[i].stats, 0,
-					sizeof(struct tx_stats));
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 #ifdef RTE_NET_AF_XDP_LIBBPF_XDP_ATTACH
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v7 5/9] net/pcap: use generic SW stats
  2024-05-17 17:35   ` [PATCH v7 0/9] Use weak atomic operations for SW PMD counters Stephen Hemminger
                       ` (3 preceding siblings ...)
  2024-05-17 17:35     ` [PATCH v7 4/9] net/af_xdp: " Stephen Hemminger
@ 2024-05-17 17:35     ` Stephen Hemminger
  2024-05-17 17:35     ` [PATCH v7 6/9] test/pmd_ring: initialize mbufs Stephen Hemminger
                       ` (3 subsequent siblings)
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17 17:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use common statistics for SW drivers.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/pcap/pcap_ethdev.c | 125 +++++++--------------------------
 1 file changed, 26 insertions(+), 99 deletions(-)

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index bfec085045..b1a983f871 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -11,6 +11,7 @@
 
 #include <rte_cycles.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <ethdev_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_malloc.h>
@@ -48,13 +49,6 @@ static uint8_t iface_idx;
 static uint64_t timestamp_rx_dynflag;
 static int timestamp_dynfield_offset = -1;
 
-struct queue_stat {
-	volatile unsigned long pkts;
-	volatile unsigned long bytes;
-	volatile unsigned long err_pkts;
-	volatile unsigned long rx_nombuf;
-};
-
 struct queue_missed_stat {
 	/* last value retrieved from pcap */
 	unsigned int pcap;
@@ -68,7 +62,7 @@ struct pcap_rx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
 	struct rte_mempool *mb_pool;
-	struct queue_stat rx_stat;
+	struct rte_eth_counters rx_stat;
 	struct queue_missed_stat missed_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
@@ -80,7 +74,7 @@ struct pcap_rx_queue {
 struct pcap_tx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
-	struct queue_stat tx_stat;
+	struct rte_eth_counters tx_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
 };
@@ -238,7 +232,6 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	int i;
 	struct pcap_rx_queue *pcap_q = queue;
-	uint32_t rx_bytes = 0;
 
 	if (unlikely(nb_pkts == 0))
 		return 0;
@@ -252,39 +245,35 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		if (err)
 			return i;
 
+		rte_eth_count_mbuf(&pcap_q->rx_stat, pcap_buf);
+
 		rte_memcpy(rte_pktmbuf_mtod(bufs[i], void *),
 				rte_pktmbuf_mtod(pcap_buf, void *),
 				pcap_buf->data_len);
 		bufs[i]->data_len = pcap_buf->data_len;
 		bufs[i]->pkt_len = pcap_buf->pkt_len;
 		bufs[i]->port = pcap_q->port_id;
-		rx_bytes += pcap_buf->data_len;
+
 
 		/* Enqueue packet back on ring to allow infinite rx. */
 		rte_ring_enqueue(pcap_q->pkts, pcap_buf);
 	}
 
-	pcap_q->rx_stat.pkts += i;
-	pcap_q->rx_stat.bytes += rx_bytes;
-
 	return i;
 }
 
 static uint16_t
 eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
+	struct pcap_rx_queue *pcap_q = queue;
+	struct rte_eth_dev *dev = &rte_eth_devices[pcap_q->port_id];
+	struct pmd_process_private *pp = dev->process_private;
+	pcap_t *pcap = pp->rx_pcap[pcap_q->queue_id];
 	unsigned int i;
 	struct pcap_pkthdr header;
-	struct pmd_process_private *pp;
 	const u_char *packet;
 	struct rte_mbuf *mbuf;
-	struct pcap_rx_queue *pcap_q = queue;
 	uint16_t num_rx = 0;
-	uint32_t rx_bytes = 0;
-	pcap_t *pcap;
-
-	pp = rte_eth_devices[pcap_q->port_id].process_private;
-	pcap = pp->rx_pcap[pcap_q->queue_id];
 
 	if (unlikely(pcap == NULL || nb_pkts == 0))
 		return 0;
@@ -300,7 +289,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		mbuf = rte_pktmbuf_alloc(pcap_q->mb_pool);
 		if (unlikely(mbuf == NULL)) {
-			pcap_q->rx_stat.rx_nombuf++;
+			++dev->data->rx_mbuf_alloc_failed;
 			break;
 		}
 
@@ -315,7 +304,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 						       mbuf,
 						       packet,
 						       header.caplen) == -1)) {
-				pcap_q->rx_stat.err_pkts++;
+				rte_eth_count_error(&pcap_q->rx_stat);
 				rte_pktmbuf_free(mbuf);
 				break;
 			}
@@ -329,11 +318,10 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		mbuf->ol_flags |= timestamp_rx_dynflag;
 		mbuf->port = pcap_q->port_id;
 		bufs[num_rx] = mbuf;
+
+		rte_eth_count_mbuf(&pcap_q->rx_stat, mbuf);
 		num_rx++;
-		rx_bytes += header.caplen;
 	}
-	pcap_q->rx_stat.pkts += num_rx;
-	pcap_q->rx_stat.bytes += rx_bytes;
 
 	return num_rx;
 }
@@ -379,8 +367,6 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *dumper_q = queue;
-	uint16_t num_tx = 0;
-	uint32_t tx_bytes = 0;
 	struct pcap_pkthdr header;
 	pcap_dumper_t *dumper;
 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
@@ -412,8 +398,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		pcap_dump((u_char *)dumper, &header,
 			rte_pktmbuf_read(mbuf, 0, caplen, temp_data));
 
-		num_tx++;
-		tx_bytes += caplen;
+		rte_eth_count_mbuf(&dumper_q->tx_stat, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
@@ -423,9 +408,6 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	 * we flush the pcap dumper within each burst.
 	 */
 	pcap_dump_flush(dumper);
-	dumper_q->tx_stat.pkts += num_tx;
-	dumper_q->tx_stat.bytes += tx_bytes;
-	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
 
 	return nb_pkts;
 }
@@ -437,20 +419,16 @@ static uint16_t
 eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	unsigned int i;
-	uint32_t tx_bytes = 0;
 	struct pcap_tx_queue *tx_queue = queue;
 
 	if (unlikely(nb_pkts == 0))
 		return 0;
 
 	for (i = 0; i < nb_pkts; i++) {
-		tx_bytes += bufs[i]->pkt_len;
+		rte_eth_count_mbuf(&tx_queue->tx_stat, bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	tx_queue->tx_stat.pkts += nb_pkts;
-	tx_queue->tx_stat.bytes += tx_bytes;
-
 	return i;
 }
 
@@ -465,8 +443,6 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *tx_queue = queue;
-	uint16_t num_tx = 0;
-	uint32_t tx_bytes = 0;
 	pcap_t *pcap;
 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
 	size_t len;
@@ -497,15 +473,11 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			rte_pktmbuf_read(mbuf, 0, len, temp_data), len);
 		if (unlikely(ret != 0))
 			break;
-		num_tx++;
-		tx_bytes += len;
+
+		rte_eth_count_mbuf(&tx_queue->tx_stat, mbuf);
 		rte_pktmbuf_free(mbuf);
 	}
 
-	tx_queue->tx_stat.pkts += num_tx;
-	tx_queue->tx_stat.bytes += tx_bytes;
-	tx_queue->tx_stat.err_pkts += i - num_tx;
-
 	return i;
 }
 
@@ -746,41 +718,12 @@ static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned int i;
-	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
-	unsigned long rx_missed_total = 0;
-	unsigned long rx_nombuf_total = 0, rx_err_total = 0;
-	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
-	unsigned long tx_packets_err_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_queue[i].rx_stat.pkts;
-		stats->q_ibytes[i] = internal->rx_queue[i].rx_stat.bytes;
-		rx_nombuf_total += internal->rx_queue[i].rx_stat.rx_nombuf;
-		rx_err_total += internal->rx_queue[i].rx_stat.err_pkts;
-		rx_packets_total += stats->q_ipackets[i];
-		rx_bytes_total += stats->q_ibytes[i];
-		rx_missed_total += queue_missed_stat_get(dev, i);
-	}
 
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_queue[i].tx_stat.pkts;
-		stats->q_obytes[i] = internal->tx_queue[i].tx_stat.bytes;
-		tx_packets_total += stats->q_opackets[i];
-		tx_bytes_total += stats->q_obytes[i];
-		tx_packets_err_total += internal->tx_queue[i].tx_stat.err_pkts;
-	}
+	rte_eth_counters_stats_get(dev, offsetof(struct pcap_tx_queue, tx_stat),
+				   offsetof(struct pcap_rx_queue, rx_stat), stats);
 
-	stats->ipackets = rx_packets_total;
-	stats->ibytes = rx_bytes_total;
-	stats->imissed = rx_missed_total;
-	stats->ierrors = rx_err_total;
-	stats->rx_nombuf = rx_nombuf_total;
-	stats->opackets = tx_packets_total;
-	stats->obytes = tx_bytes_total;
-	stats->oerrors = tx_packets_err_total;
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		stats->imissed += queue_missed_stat_get(dev, i);
 
 	return 0;
 }
@@ -789,21 +732,12 @@ static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
 	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		internal->rx_queue[i].rx_stat.pkts = 0;
-		internal->rx_queue[i].rx_stat.bytes = 0;
-		internal->rx_queue[i].rx_stat.err_pkts = 0;
-		internal->rx_queue[i].rx_stat.rx_nombuf = 0;
-		queue_missed_stat_reset(dev, i);
-	}
+	rte_eth_counters_reset(dev, offsetof(struct pcap_tx_queue, tx_stat),
+			       offsetof(struct pcap_rx_queue, rx_stat));
 
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		internal->tx_queue[i].tx_stat.pkts = 0;
-		internal->tx_queue[i].tx_stat.bytes = 0;
-		internal->tx_queue[i].tx_stat.err_pkts = 0;
-	}
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		queue_missed_stat_reset(dev, i);
 
 	return 0;
 }
@@ -929,13 +863,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 				pcap_pkt_count);
 			return -EINVAL;
 		}
-
-		/*
-		 * Reset the stats for this queue since eth_pcap_rx calls above
-		 * didn't result in the application receiving packets.
-		 */
-		pcap_q->rx_stat.pkts = 0;
-		pcap_q->rx_stat.bytes = 0;
 	}
 
 	return 0;
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v7 6/9] test/pmd_ring: initialize mbufs
  2024-05-17 17:35   ` [PATCH v7 0/9] Use weak atomic operations for SW PMD counters Stephen Hemminger
                       ` (4 preceding siblings ...)
  2024-05-17 17:35     ` [PATCH v7 5/9] net/pcap: " Stephen Hemminger
@ 2024-05-17 17:35     ` Stephen Hemminger
  2024-05-17 17:35     ` [PATCH v7 7/9] net/ring: use generic SW stats Stephen Hemminger
                       ` (2 subsequent siblings)
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17 17:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Bruce Richardson

Do not pass uninitialized data into the ring PMD.
The mbufs should be initialized first so that length is zero.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 app/test/test_pmd_ring.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/app/test/test_pmd_ring.c b/app/test/test_pmd_ring.c
index e83b9dd6b8..55455ece7f 100644
--- a/app/test/test_pmd_ring.c
+++ b/app/test/test_pmd_ring.c
@@ -19,6 +19,14 @@ static struct rte_mempool *mp;
 struct rte_ring *rxtx[NUM_RINGS];
 static int tx_porta, rx_portb, rxtx_portc, rxtx_portd, rxtx_porte;
 
+/* make a valid zero sized mbuf */
+static void
+test_mbuf_init(struct rte_mbuf *mbuf)
+{
+	memset(mbuf, 0, sizeof(*mbuf));
+	rte_pktmbuf_reset(mbuf);
+}
+
 static int
 test_ethdev_configure_port(int port)
 {
@@ -68,14 +76,16 @@ test_ethdev_configure_port(int port)
 static int
 test_send_basic_packets(void)
 {
-	struct rte_mbuf  bufs[RING_SIZE];
+	struct rte_mbuf bufs[RING_SIZE];
 	struct rte_mbuf *pbufs[RING_SIZE];
 	int i;
 
 	printf("Testing send and receive RING_SIZE/2 packets (tx_porta -> rx_portb)\n");
 
-	for (i = 0; i < RING_SIZE/2; i++)
+	for (i = 0; i < RING_SIZE / 2; i++) {
+		test_mbuf_init(&bufs[i]);
 		pbufs[i] = &bufs[i];
+	}
 
 	if (rte_eth_tx_burst(tx_porta, 0, pbufs, RING_SIZE/2) < RING_SIZE/2) {
 		printf("Failed to transmit packet burst port %d\n", tx_porta);
@@ -99,14 +109,16 @@ test_send_basic_packets(void)
 static int
 test_send_basic_packets_port(int port)
 {
-	struct rte_mbuf  bufs[RING_SIZE];
+	struct rte_mbuf bufs[RING_SIZE];
 	struct rte_mbuf *pbufs[RING_SIZE];
 	int i;
 
 	printf("Testing send and receive RING_SIZE/2 packets (cmdl_port0 -> cmdl_port0)\n");
 
-	for (i = 0; i < RING_SIZE/2; i++)
+	for (i = 0; i < RING_SIZE / 2; i++) {
+		test_mbuf_init(&bufs[i]);
 		pbufs[i] = &bufs[i];
+	}
 
 	if (rte_eth_tx_burst(port, 0, pbufs, RING_SIZE/2) < RING_SIZE/2) {
 		printf("Failed to transmit packet burst port %d\n", port);
@@ -134,10 +146,11 @@ test_get_stats(int port)
 	struct rte_eth_stats stats;
 	struct rte_mbuf buf, *pbuf = &buf;
 
+	test_mbuf_init(&buf);
+
 	printf("Testing ring PMD stats_get port %d\n", port);
 
 	/* check stats of RXTX port, should all be zero */
-
 	rte_eth_stats_get(port, &stats);
 	if (stats.ipackets != 0 || stats.opackets != 0 ||
 			stats.ibytes != 0 || stats.obytes != 0 ||
@@ -173,6 +186,8 @@ test_stats_reset(int port)
 	struct rte_eth_stats stats;
 	struct rte_mbuf buf, *pbuf = &buf;
 
+	test_mbuf_init(&buf);
+
 	printf("Testing ring PMD stats_reset port %d\n", port);
 
 	rte_eth_stats_reset(port);
@@ -228,6 +243,7 @@ test_pmd_ring_pair_create_attach(void)
 	int ret;
 
 	memset(&null_conf, 0, sizeof(struct rte_eth_conf));
+	test_mbuf_init(&buf);
 
 	if ((rte_eth_dev_configure(rxtx_portd, 1, 1, &null_conf) < 0)
 			|| (rte_eth_dev_configure(rxtx_porte, 1, 1,
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v7 7/9] net/ring: use generic SW stats
  2024-05-17 17:35   ` [PATCH v7 0/9] Use weak atomic operations for SW PMD counters Stephen Hemminger
                       ` (5 preceding siblings ...)
  2024-05-17 17:35     ` [PATCH v7 6/9] test/pmd_ring: initialize mbufs Stephen Hemminger
@ 2024-05-17 17:35     ` Stephen Hemminger
  2024-05-17 17:35     ` [PATCH v7 8/9] net/tap: " Stephen Hemminger
  2024-05-17 17:35     ` [PATCH v7 9/9] net/null: " Stephen Hemminger
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17 17:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Bruce Richardson

Use generic per-queue infrastructure.
This also fixes bug where ring code was not accounting
for bytes.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/ring/rte_eth_ring.c | 71 +++++++++++++--------------------
 1 file changed, 28 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index b16f5d55f2..85f14dd679 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -7,6 +7,7 @@
 #include "rte_eth_ring.h"
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <rte_os_shim.h>
@@ -44,8 +45,8 @@ enum dev_action {
 
 struct ring_queue {
 	struct rte_ring *rng;
-	RTE_ATOMIC(uint64_t) rx_pkts;
-	RTE_ATOMIC(uint64_t) tx_pkts;
+
+	struct rte_eth_counters stats;
 };
 
 struct pmd_internals {
@@ -77,12 +78,13 @@ eth_ring_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
 	struct ring_queue *r = q;
-	const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SC_DEQ)
-		r->rx_pkts += nb_rx;
-	else
-		rte_atomic_fetch_add_explicit(&r->rx_pkts, nb_rx, rte_memory_order_relaxed);
+	uint16_t i, nb_rx;
+
+	nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	for (i = 0; i < nb_rx; i++)
+		rte_eth_count_mbuf(&r->stats, bufs[i]);
+
 	return nb_rx;
 }
 
@@ -90,13 +92,20 @@ static uint16_t
 eth_ring_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
+	uint32_t *sizes;
 	struct ring_queue *r = q;
-	const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SP_ENQ)
-		r->tx_pkts += nb_tx;
-	else
-		rte_atomic_fetch_add_explicit(&r->tx_pkts, nb_tx, rte_memory_order_relaxed);
+	uint16_t i, nb_tx;
+
+	sizes = alloca(sizeof(uint32_t) * nb_bufs);
+
+	for (i = 0; i < nb_bufs; i++)
+		sizes[i] = rte_pktmbuf_pkt_len(bufs[i]);
+
+	nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	for (i = 0; i < nb_tx; i++)
+		rte_eth_count_packet(&r->stats, sizes[i]);
+
 	return nb_tx;
 }
 
@@ -193,40 +202,16 @@ eth_dev_info(struct rte_eth_dev *dev,
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
-		rx_total += stats->q_ipackets[i];
-	}
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
-		tx_total += stats->q_opackets[i];
-	}
-
-	stats->ipackets = rx_total;
-	stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct ring_queue, stats),
+					  offsetof(struct ring_queue, stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		internal->rx_ring_queues[i].rx_pkts = 0;
-	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		internal->tx_ring_queues[i].tx_pkts = 0;
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct ring_queue, stats),
+				      offsetof(struct ring_queue, stats));
 }
 
 static void
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v7 8/9] net/tap: use generic SW stats
  2024-05-17 17:35   ` [PATCH v7 0/9] Use weak atomic operations for SW PMD counters Stephen Hemminger
                       ` (6 preceding siblings ...)
  2024-05-17 17:35     ` [PATCH v7 7/9] net/ring: use generic SW stats Stephen Hemminger
@ 2024-05-17 17:35     ` Stephen Hemminger
  2024-05-17 17:35     ` [PATCH v7 9/9] net/null: " Stephen Hemminger
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17 17:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use new common sw statistics.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/tap/rte_eth_tap.c | 88 ++++++-----------------------------
 drivers/net/tap/rte_eth_tap.h | 15 ++----
 2 files changed, 18 insertions(+), 85 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 69d9da695b..f87979da4f 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -432,7 +432,6 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rx_queue *rxq = queue;
 	struct pmd_process_private *process_private;
 	uint16_t num_rx;
-	unsigned long num_rx_bytes = 0;
 	uint32_t trigger = tap_trigger;
 
 	if (trigger == rxq->trigger_seen)
@@ -455,7 +454,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* Packet couldn't fit in the provided mbuf */
 		if (unlikely(rxq->pi.flags & TUN_PKT_STRIP)) {
-			rxq->stats.ierrors++;
+			rte_eth_count_error(&rxq->stats);
 			continue;
 		}
 
@@ -467,7 +466,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
 
 			if (unlikely(!buf)) {
-				rxq->stats.rx_nombuf++;
+				struct rte_eth_dev *dev = &rte_eth_devices[rxq->in_port];
+				++dev->data->rx_mbuf_alloc_failed;
+
 				/* No new buf has been allocated: do nothing */
 				if (!new_tail || !seg)
 					goto end;
@@ -509,11 +510,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* account for the receive frame */
 		bufs[num_rx++] = mbuf;
-		num_rx_bytes += mbuf->pkt_len;
+		rte_eth_count_mbuf(&rxq->stats, mbuf);
 	}
 end:
-	rxq->stats.ipackets += num_rx;
-	rxq->stats.ibytes += num_rx_bytes;
 
 	if (trigger && num_rx < nb_pkts)
 		rxq->trigger_seen = trigger;
@@ -523,8 +522,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 static inline int
 tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
-			struct rte_mbuf **pmbufs,
-			uint16_t *num_packets, unsigned long *num_tx_bytes)
+		struct rte_mbuf **pmbufs)
 {
 	struct pmd_process_private *process_private;
 	int i;
@@ -647,8 +645,7 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
 		if (n <= 0)
 			return -1;
 
-		(*num_packets)++;
-		(*num_tx_bytes) += rte_pktmbuf_pkt_len(mbuf);
+		rte_eth_count_mbuf(&txq->stats, mbuf);
 	}
 	return 0;
 }
@@ -660,8 +657,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	struct tx_queue *txq = queue;
 	uint16_t num_tx = 0;
-	uint16_t num_packets = 0;
-	unsigned long num_tx_bytes = 0;
 	uint32_t max_size;
 	int i;
 
@@ -693,7 +688,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			tso_segsz = mbuf_in->tso_segsz + hdrs_len;
 			if (unlikely(tso_segsz == hdrs_len) ||
 				tso_segsz > *txq->mtu) {
-				txq->stats.errs++;
+				rte_eth_count_error(&txq->stats);
 				break;
 			}
 			gso_ctx->gso_size = tso_segsz;
@@ -728,10 +723,10 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			num_mbufs = 1;
 		}
 
-		ret = tap_write_mbufs(txq, num_mbufs, mbuf,
-				&num_packets, &num_tx_bytes);
+		ret = tap_write_mbufs(txq, num_mbufs, mbuf);
 		if (ret == -1) {
-			txq->stats.errs++;
+			rte_eth_count_error(&txq->stats);
+
 			/* free tso mbufs */
 			if (num_tso_mbufs > 0)
 				rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs);
@@ -749,10 +744,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		}
 	}
 
-	txq->stats.opackets += num_packets;
-	txq->stats.errs += nb_pkts - num_tx;
-	txq->stats.obytes += num_tx_bytes;
-
 	return num_tx;
 }
 
@@ -1055,64 +1046,15 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
 {
-	unsigned int i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	unsigned long rx_nombuf = 0, ierrors = 0;
-	const struct pmd_internals *pmd = dev->data->dev_private;
-
-	/* rx queue statistics */
-	imax = (dev->data->nb_rx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_rx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
-		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
-		rx_total += tap_stats->q_ipackets[i];
-		rx_bytes_total += tap_stats->q_ibytes[i];
-		rx_nombuf += pmd->rxq[i].stats.rx_nombuf;
-		ierrors += pmd->rxq[i].stats.ierrors;
-	}
-
-	/* tx queue statistics */
-	imax = (dev->data->nb_tx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_tx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
-		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
-		tx_total += tap_stats->q_opackets[i];
-		tx_err_total += pmd->txq[i].stats.errs;
-		tx_bytes_total += tap_stats->q_obytes[i];
-	}
-
-	tap_stats->ipackets = rx_total;
-	tap_stats->ibytes = rx_bytes_total;
-	tap_stats->ierrors = ierrors;
-	tap_stats->rx_nombuf = rx_nombuf;
-	tap_stats->opackets = tx_total;
-	tap_stats->oerrors = tx_err_total;
-	tap_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct tx_queue, stats),
+					  offsetof(struct rx_queue, stats), tap_stats);
 }
 
 static int
 tap_stats_reset(struct rte_eth_dev *dev)
 {
-	int i;
-	struct pmd_internals *pmd = dev->data->dev_private;
-
-	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-		pmd->rxq[i].stats.ipackets = 0;
-		pmd->rxq[i].stats.ibytes = 0;
-		pmd->rxq[i].stats.ierrors = 0;
-		pmd->rxq[i].stats.rx_nombuf = 0;
-
-		pmd->txq[i].stats.opackets = 0;
-		pmd->txq[i].stats.errs = 0;
-		pmd->txq[i].stats.obytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct tx_queue, stats),
+				      offsetof(struct rx_queue, stats));
 }
 
 static int
diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
index 5ac93f93e9..8cba9ea410 100644
--- a/drivers/net/tap/rte_eth_tap.h
+++ b/drivers/net/tap/rte_eth_tap.h
@@ -14,6 +14,7 @@
 #include <linux/if_tun.h>
 
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_ether.h>
 #include <rte_gso.h>
 #include "tap_log.h"
@@ -32,23 +33,13 @@ enum rte_tuntap_type {
 	ETH_TUNTAP_TYPE_MAX,
 };
 
-struct pkt_stats {
-	uint64_t opackets;              /* Number of output packets */
-	uint64_t ipackets;              /* Number of input packets */
-	uint64_t obytes;                /* Number of bytes on output */
-	uint64_t ibytes;                /* Number of bytes on input */
-	uint64_t errs;                  /* Number of TX error packets */
-	uint64_t ierrors;               /* Number of RX error packets */
-	uint64_t rx_nombuf;             /* Nb of RX mbuf alloc failures */
-};
-
 struct rx_queue {
 	struct rte_mempool *mp;         /* Mempool for RX packets */
 	uint32_t trigger_seen;          /* Last seen Rx trigger value */
 	uint16_t in_port;               /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-	struct pkt_stats stats;         /* Stats for this RX queue */
 	uint16_t nb_rx_desc;            /* max number of mbufs available */
+	struct rte_eth_counters stats;  /* Stats for this RX queue */
 	struct rte_eth_rxmode *rxmode;  /* RX features */
 	struct rte_mbuf *pool;          /* mbufs pool for this queue */
 	struct iovec (*iovecs)[];       /* descriptors for this queue */
@@ -59,7 +50,7 @@ struct tx_queue {
 	int type;                       /* Type field - TUN|TAP */
 	uint16_t *mtu;                  /* Pointer to MTU from dev_data */
 	uint16_t csum:1;                /* Enable checksum offloading */
-	struct pkt_stats stats;         /* Stats for this TX queue */
+	struct rte_eth_counters stats;	/* Stats for this TX queue */
 	struct rte_gso_ctx gso_ctx;     /* GSO context */
 	uint16_t out_port;              /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v7 9/9] net/null: use generic SW stats
  2024-05-17 17:35   ` [PATCH v7 0/9] Use weak atomic operations for SW PMD counters Stephen Hemminger
                       ` (7 preceding siblings ...)
  2024-05-17 17:35     ` [PATCH v7 8/9] net/tap: " Stephen Hemminger
@ 2024-05-17 17:35     ` Stephen Hemminger
  8 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-17 17:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Tetsuya Mukawa

Use the new common code for statistics.
This also fixes the bug that this driver was not accounting
for bytes.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/null/rte_eth_null.c | 80 +++++++--------------------------
 1 file changed, 17 insertions(+), 63 deletions(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index f4ed3b8a7f..7786982732 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -8,6 +8,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <bus_vdev_driver.h>
@@ -37,8 +38,8 @@ struct null_queue {
 	struct rte_mempool *mb_pool;
 	struct rte_mbuf *dummy_packet;
 
-	RTE_ATOMIC(uint64_t) rx_pkts;
-	RTE_ATOMIC(uint64_t) tx_pkts;
+	struct rte_eth_counters tx_stats;
+	struct rte_eth_counters rx_stats;
 };
 
 struct pmd_options {
@@ -99,11 +100,9 @@ eth_null_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		bufs[i]->data_len = (uint16_t)packet_size;
 		bufs[i]->pkt_len = packet_size;
 		bufs[i]->port = h->internals->port_id;
+		rte_eth_count_mbuf(&h->rx_stats, bufs[i]);
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->rx_pkts, i, rte_memory_order_seq_cst);
-
 	return i;
 }
 
@@ -127,11 +126,9 @@ eth_null_copy_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		bufs[i]->data_len = (uint16_t)packet_size;
 		bufs[i]->pkt_len = packet_size;
 		bufs[i]->port = h->internals->port_id;
+		rte_eth_count_mbuf(&h->rx_stats, bufs[i]);
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->rx_pkts, i, rte_memory_order_seq_cst);
-
 	return i;
 }
 
@@ -151,11 +148,10 @@ eth_null_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	if ((q == NULL) || (bufs == NULL))
 		return 0;
 
-	for (i = 0; i < nb_bufs; i++)
+	for (i = 0; i < nb_bufs; i++) {
+		rte_eth_count_mbuf(&h->tx_stats, bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
-
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->tx_pkts, i, rte_memory_order_seq_cst);
+	}
 
 	return i;
 }
@@ -174,12 +170,10 @@ eth_null_copy_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	for (i = 0; i < nb_bufs; i++) {
 		rte_memcpy(h->dummy_packet, rte_pktmbuf_mtod(bufs[i], void *),
 					packet_size);
+		rte_eth_count_mbuf(&h->tx_stats, bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->tx_pkts, i, rte_memory_order_seq_cst);
-
 	return i;
 }
 
@@ -322,60 +316,20 @@ eth_dev_info(struct rte_eth_dev *dev,
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i, num_stats;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal;
-
-	if ((dev == NULL) || (igb_stats == NULL))
-		return -EINVAL;
-
-	internal = dev->data->dev_private;
-	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
-			RTE_MIN(dev->data->nb_rx_queues,
-				RTE_DIM(internal->rx_null_queues)));
-	for (i = 0; i < num_stats; i++) {
-		/* NOTE: review for atomic access */
-		igb_stats->q_ipackets[i] =
-			internal->rx_null_queues[i].rx_pkts;
-		rx_total += igb_stats->q_ipackets[i];
-	}
-
-	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
-			RTE_MIN(dev->data->nb_tx_queues,
-				RTE_DIM(internal->tx_null_queues)));
-	for (i = 0; i < num_stats; i++) {
-		/* NOTE: review for atomic access */
-		igb_stats->q_opackets[i] =
-			internal->tx_null_queues[i].tx_pkts;
-		tx_total += igb_stats->q_opackets[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev,
+					  offsetof(struct null_queue, tx_stats),
+					  offsetof(struct null_queue, rx_stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal;
-
-	if (dev == NULL)
-		return -EINVAL;
-
-	internal = dev->data->dev_private;
-	for (i = 0; i < RTE_DIM(internal->rx_null_queues); i++)
-		/* NOTE: review for atomic access */
-		internal->rx_null_queues[i].rx_pkts = 0;
-	for (i = 0; i < RTE_DIM(internal->tx_null_queues); i++)
-		/* NOTE: review for atomic access */
-		internal->tx_null_queues[i].tx_pkts = 0;
-
-	return 0;
+	return rte_eth_counters_reset(dev,
+				      offsetof(struct null_queue, tx_stats),
+				      offsetof(struct null_queue, rx_stats));
 }
 
 static void
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v6 1/9] eal: generic 64 bit counter
  2024-05-17 16:18               ` Stephen Hemminger
@ 2024-05-18 14:00                 ` Morten Brørup
  2024-05-19 15:13                   ` Stephen Hemminger
  0 siblings, 1 reply; 179+ messages in thread
From: Morten Brørup @ 2024-05-18 14:00 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Honnappa Nagarahalli, dev, nd, Richardson, Bruce

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Friday, 17 May 2024 18.18
> 
> On Fri, 17 May 2024 08:44:42 +0200
> Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> > > From: Honnappa Nagarahalli [mailto:Honnappa.Nagarahalli@arm.com]
> > > Sent: Friday, 17 May 2024 06.27
> > >
> > > + Bruce for feedback on x86 architecture
> > >
> > > > On May 16, 2024, at 10:30 PM, Stephen Hemminger
> <stephen@networkplumber.org>
> > > wrote:
> > > >
> > > > On Fri, 17 May 2024 02:45:12 +0000
> > > > Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> > > >
> > > >>> + * A counter is 64 bit value that is safe from split read/write
> > > >>> + * on 32 bit platforms. It assumes that only one cpu at a time
> > > >> If we are defining the counter in this manner, then
> implementation cannot
> > > be generic. I think architectures will have constraints if they have
> to ensure
> > > the 64b variables are not split.
> > > >>
> > > >> I think we at least need the counter to be aligned on 8B boundary
> to have
> > > generic code.
> > > >
> > > > The C standard has always guaranteed that read and write to
> unsigned log
> > > will not be split.
> > > As I understand, this is true only if the variable is an atomic
> variable. If
> > > not, there is no difference between atomic variables and non-atomic
> variables.
> > >
> > > > Therefore if arch is 64 bit native there is no need for atomics
> > > At least on ARM architecture, if the variable is not aligned on 8B
> boundary,
> > > the load or store are not atomic. I am sure it is the same on other
> > > architectures.
> 
> After reading this: Who's afraid of a big bad optimizing compiler?
>  https://lwn.net/Articles/793253/

Very interesting article!

> 
> Looks like you are right, and atomic or read/write once is required.

I don't like the performance tradeoff (for 64 bit architectures) in the v7 patch.
For single-tread updated counters, we MUST have a high performance counter_add(), not using atomic read-modify-write.

IMO calling counter_fetch() MUST be possible from another thread.
This requires that the fast path thread stores the counter atomically (or using volatile), to ensure that the counter is not only kept in a CPU register, but stored in memory visible by other threads.

For counter_reset(), we have multiple options:
0. Don't support counter resetting. Not really on option?
1. Only allow calling counter_reset() in the fast path thread updating the counter. This introduces no further requirements.
2. Allow calling counter_reset() from another thread, thereby zeroing the "counter" variable. This introduces a requirement for the "counter" to be thread-safe, so counter_add() must atomically read-modify-write the counter, which has a performance cost.
3. Allow calling counter_reset() from another thread, and introduce an "offset" variable for counter_fetch() and counter_reset() to provide thread-safe fetch/reset from other threads, using the consume-release pattern.

I don't like option 2.
I consider counters too important and frequently used in the fast path, to compromise on performance for counters.

For counters updated by multiple fast path threads, atomic_fetch_add_explicit() of the "counter" variable seems unavoidable.

> Perhaps introducing rte_read_once and rte_write_once is good idea?
> Several drivers are implementing it already.

The read_once/write_once are easier to use, but they lack the flexibility (regarding barriers and locking) provided by their atomic_explicit alternatives, which will impact performance in some use cases.

We should strive for the highest possible performance, which means that we shouldn't introduce functions or design patterns preventing this.
Please note: Security vs. performance is another matter - we certainly don't want to promote insecure code for the benefit of performance. But for "ease of coding" vs. performance, I prefer performance.

That said, I agree that introducing rte_read/write_once functions for use by drivers to access hardware makes sense, to eliminate copy-paste variants in drivers.
But how can we prevent such functions from being used for other purposes, where atomic_explicit should be used?

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 1/9] eal: generic 64 bit counter
  2024-05-18 14:00                 ` Morten Brørup
@ 2024-05-19 15:13                   ` Stephen Hemminger
  2024-05-19 17:10                     ` Morten Brørup
  0 siblings, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-19 15:13 UTC (permalink / raw)
  To: Morten Brørup; +Cc: Honnappa Nagarahalli, dev, nd, Richardson, Bruce

On Sat, 18 May 2024 16:00:55 +0200
Morten Brørup <mb@smartsharesystems.com> wrote:

> > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > Sent: Friday, 17 May 2024 18.18
> > 
> > On Fri, 17 May 2024 08:44:42 +0200
> > Morten Brørup <mb@smartsharesystems.com> wrote:
> >   
> > > > From: Honnappa Nagarahalli [mailto:Honnappa.Nagarahalli@arm.com]
> > > > Sent: Friday, 17 May 2024 06.27
> > > >
> > > > + Bruce for feedback on x86 architecture
> > > >  
> > > > > On May 16, 2024, at 10:30 PM, Stephen Hemminger  
> > <stephen@networkplumber.org>  
> > > > wrote:  
> > > > >
> > > > > On Fri, 17 May 2024 02:45:12 +0000
> > > > > Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> > > > >  
> > > > >>> + * A counter is 64 bit value that is safe from split read/write
> > > > >>> + * on 32 bit platforms. It assumes that only one cpu at a time  
> > > > >> If we are defining the counter in this manner, then  
> > implementation cannot  
> > > > be generic. I think architectures will have constraints if they have  
> > to ensure  
> > > > the 64b variables are not split.  
> > > > >>
> > > > >> I think we at least need the counter to be aligned on 8B boundary  
> > to have  
> > > > generic code.  
> > > > >
> > > > > The C standard has always guaranteed that read and write to  
> > unsigned log  
> > > > will not be split.
> > > > As I understand, this is true only if the variable is an atomic  
> > variable. If  
> > > > not, there is no difference between atomic variables and non-atomic  
> > variables.  
> > > >  
> > > > > Therefore if arch is 64 bit native there is no need for atomics  
> > > > At least on ARM architecture, if the variable is not aligned on 8B  
> > boundary,  
> > > > the load or store are not atomic. I am sure it is the same on other
> > > > architectures.  
> > 
> > After reading this: Who's afraid of a big bad optimizing compiler?
> >  https://lwn.net/Articles/793253/  
> 
> Very interesting article!
> 
> > 
> > Looks like you are right, and atomic or read/write once is required.  
> 
> I don't like the performance tradeoff (for 64 bit architectures) in the v7 patch.
> For single-tread updated counters, we MUST have a high performance counter_add(), not using atomic read-modify-write.

The fundamental issue is that for SW drivers, having a totally safe reset function requires
an atomic operation in the fast path for increment. Otherwise the following (highly unlikely) race
is possible:
	
	CPU A					CPU B
		load counter (value = X)
						store counter = 0
		store counter (value = X + 1)


> 
> IMO calling counter_fetch() MUST be possible from another thread.
> This requires that the fast path thread stores the counter atomically (or using volatile), to ensure that the counter is not only kept in a CPU register, but stored in memory visible by other threads.
> 
> For counter_reset(), we have multiple options:
> 0. Don't support counter resetting. Not really on option?

We could reject it in the SW drivers. But better not to.

> 1. Only allow calling counter_reset() in the fast path thread updating the counter. This introduces no further requirements.

Not a good restriction

> 2. Allow calling counter_reset() from another thread, thereby zeroing the "counter" variable. This introduces a requirement for the "counter" to be thread-safe, so counter_add() must atomically read-modify-write the counter, which has a performance cost.

> 3. Allow calling counter_reset() from another thread, and introduce an "offset" variable for counter_fetch() and counter_reset() to provide thread-safe fetch/reset from other threads, using the consume-release pattern.

Too confusing

> 
> I don't like option 2.
> I consider counters too important and frequently used in the fast path, to compromise on performance for counters.

Agree.

> 
> For counters updated by multiple fast path threads, atomic_fetch_add_explicit() of the "counter" variable seems unavoidable.

Not at all worried about overhead in slow (fetch) path.

> 
> > Perhaps introducing rte_read_once and rte_write_once is good idea?
> > Several drivers are implementing it already.  
> 
> The read_once/write_once are easier to use, but they lack the flexibility (regarding barriers and locking) provided by their atomic_explicit alternatives, which will impact performance in some use cases.

They solve the compiler reordering problem but do nothing about cpu ordering.
Also, there is no such increment.

> 
> We should strive for the highest possible performance, which means that we shouldn't introduce functions or design patterns preventing this.
> Please note: Security vs. performance is another matter - we certainly don't want to promote insecure code for the benefit of performance. But for "ease of coding" vs. performance, I prefer performance.
> 
> That said, I agree that introducing rte_read/write_once functions for use by drivers to access hardware makes sense, to eliminate copy-paste variants in drivers.
> But how can we prevent such functions from being used for other purposes, where atomic_explicit should be used?
> 

Looking at x86 result on godbolt shows that using atomic only adds a single locked operation.
Perhaps this can be ameliorated by doing bulk add at end of loop, like many drivers were already.



^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v6 1/9] eal: generic 64 bit counter
  2024-05-19 15:13                   ` Stephen Hemminger
@ 2024-05-19 17:10                     ` Morten Brørup
  2024-05-19 22:49                       ` Stephen Hemminger
  0 siblings, 1 reply; 179+ messages in thread
From: Morten Brørup @ 2024-05-19 17:10 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Honnappa Nagarahalli, dev, nd, Richardson, Bruce

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Sunday, 19 May 2024 17.14
> 
> On Sat, 18 May 2024 16:00:55 +0200
> Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > Sent: Friday, 17 May 2024 18.18
> > >
> > > On Fri, 17 May 2024 08:44:42 +0200
> > > Morten Brørup <mb@smartsharesystems.com> wrote:
> > >
> > > > > From: Honnappa Nagarahalli [mailto:Honnappa.Nagarahalli@arm.com]
> > > > > Sent: Friday, 17 May 2024 06.27
> > > > >
> > > > > + Bruce for feedback on x86 architecture
> > > > >
> > > > > > On May 16, 2024, at 10:30 PM, Stephen Hemminger
> > > <stephen@networkplumber.org>
> > > > > wrote:
> > > > > >
> > > > > > On Fri, 17 May 2024 02:45:12 +0000
> > > > > > Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> > > > > >
> > > > > >>> + * A counter is 64 bit value that is safe from split
> read/write
> > > > > >>> + * on 32 bit platforms. It assumes that only one cpu at a
> time
> > > > > >> If we are defining the counter in this manner, then
> > > implementation cannot
> > > > > be generic. I think architectures will have constraints if they
> have
> > > to ensure
> > > > > the 64b variables are not split.
> > > > > >>
> > > > > >> I think we at least need the counter to be aligned on 8B
> boundary
> > > to have
> > > > > generic code.
> > > > > >
> > > > > > The C standard has always guaranteed that read and write to
> > > unsigned log
> > > > > will not be split.
> > > > > As I understand, this is true only if the variable is an atomic
> > > variable. If
> > > > > not, there is no difference between atomic variables and non-
> atomic
> > > variables.
> > > > >
> > > > > > Therefore if arch is 64 bit native there is no need for
> atomics
> > > > > At least on ARM architecture, if the variable is not aligned on
> 8B
> > > boundary,
> > > > > the load or store are not atomic. I am sure it is the same on
> other
> > > > > architectures.
> > >
> > > After reading this: Who's afraid of a big bad optimizing compiler?
> > >  https://lwn.net/Articles/793253/
> >
> > Very interesting article!
> >
> > >
> > > Looks like you are right, and atomic or read/write once is required.
> >
> > I don't like the performance tradeoff (for 64 bit architectures) in
> the v7 patch.
> > For single-tread updated counters, we MUST have a high performance
> counter_add(), not using atomic read-modify-write.
> 
> The fundamental issue is that for SW drivers, having a totally safe
> reset function requires
> an atomic operation in the fast path for increment. Otherwise the
> following (highly unlikely) race
> is possible:
> 
> 	CPU A					CPU B
> 		load counter (value = X)
> 						store counter = 0
> 		store counter (value = X + 1)
> 

Yes, this is why I suggest the "offset" method, option 3.
Depending on which CPU wins the race, reset() will set "offset" to either X or X + 1 using your example here.
"offset" is atomic, so reading it cannot race writing it. And thus:
if counter = X was visible at reset(), fetch() will return counter - offset = X - X = 0, and
if counter = X + 1 was visible at reset(), fetch() will return counter - offset = (X + 1) - (X + 1) = 0.

> 
> >
> > IMO calling counter_fetch() MUST be possible from another thread.
> > This requires that the fast path thread stores the counter atomically
> (or using volatile), to ensure that the counter is not only kept in a
> CPU register, but stored in memory visible by other threads.
> >
> > For counter_reset(), we have multiple options:
> > 0. Don't support counter resetting. Not really on option?
> 
> We could reject it in the SW drivers. But better not to.

Agree.

> 
> > 1. Only allow calling counter_reset() in the fast path thread updating
> the counter. This introduces no further requirements.
> 
> Not a good restriction

Not good, but the alternatives seem to be worse.

> 
> > 2. Allow calling counter_reset() from another thread, thereby zeroing
> the "counter" variable. This introduces a requirement for the "counter"
> to be thread-safe, so counter_add() must atomically read-modify-write
> the counter, which has a performance cost.

Bad for performance.

> 
> > 3. Allow calling counter_reset() from another thread, and introduce an
> "offset" variable for counter_fetch() and counter_reset() to provide
> thread-safe fetch/reset from other threads, using the consume-release
> pattern.
> 
> Too confusing

Using offsets for pseudo-reset is a common design pattern.
And the performance is excellent.

Perhaps the Linux kernel doesn't use this design pattern, but that is not a valid reason to disqualify it.

> 
> >
> > I don't like option 2.
> > I consider counters too important and frequently used in the fast
> path, to compromise on performance for counters.
> 
> Agree.
> 
> >
> > For counters updated by multiple fast path threads,
> atomic_fetch_add_explicit() of the "counter" variable seems unavoidable.
> 
> Not at all worried about overhead in slow (fetch) path.

Agree. Fetch() is slow path, so performance is "nice to have", but nothing more.

> 
> >
> > > Perhaps introducing rte_read_once and rte_write_once is good idea?
> > > Several drivers are implementing it already.
> >
> > The read_once/write_once are easier to use, but they lack the
> flexibility (regarding barriers and locking) provided by their
> atomic_explicit alternatives, which will impact performance in some use
> cases.
> 
> They solve the compiler reordering problem but do nothing about cpu
> ordering.
> Also, there is no such increment.

Agree. They are designed for hardware access, not multithreading.

> 
> >
> > We should strive for the highest possible performance, which means
> that we shouldn't introduce functions or design patterns preventing
> this.
> > Please note: Security vs. performance is another matter - we certainly
> don't want to promote insecure code for the benefit of performance. But
> for "ease of coding" vs. performance, I prefer performance.
> >
> > That said, I agree that introducing rte_read/write_once functions for
> use by drivers to access hardware makes sense, to eliminate copy-paste
> variants in drivers.
> > But how can we prevent such functions from being used for other
> purposes, where atomic_explicit should be used?
> >
> 
> Looking at x86 result on godbolt shows that using atomic only adds a
> single locked operation.
> Perhaps this can be ameliorated by doing bulk add at end of loop, like
> many drivers were already.

Absolutely; whenever possible, local counters should be maintained inside the loop, and added to the public counters at the end of a loop.

Please note that application counters might be spread all over memory.
E.g. per-flow in a flow structure, per QoS class in a QoS class structure, per subscriber in a subscriber structure, etc. And a burst of packets might touch multiple of these. My point is: Atomic read-modify-write of counters will cause serious stalling, waiting for memory access.


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 1/9] eal: generic 64 bit counter
  2024-05-19 17:10                     ` Morten Brørup
@ 2024-05-19 22:49                       ` Stephen Hemminger
  2024-05-20  7:57                         ` Morten Brørup
  0 siblings, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-19 22:49 UTC (permalink / raw)
  To: Morten Brørup; +Cc: Honnappa Nagarahalli, dev, nd, Richardson, Bruce

On Sun, 19 May 2024 19:10:30 +0200
Morten Brørup <mb@smartsharesystems.com> wrote:

> Absolutely; whenever possible, local counters should be maintained inside the loop, and added to the public counters at the end of a loop.
> 
> Please note that application counters might be spread all over memory.
> E.g. per-flow in a flow structure, per QoS class in a QoS class structure, per subscriber in a subscriber structure, etc. And a burst of packets might touch multiple of these. My point is: Atomic read-modify-write of counters will cause serious stalling, waiting for memory access

If an application needs to keep up at DPDK possible speeds, then it needs to worry about its
cache access patterns. Last time I checked handling 10G 64 byte packets at line rate without loss
means a maximum of 2 cache misses. Very hard to do with any non trivial application.

Also, SW QoS above 1G is very hard to do with modern CPU's. Especially with multiple flows.
It maybe possible with something like FQ Codel which only keeps small amount of state.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v6 1/9] eal: generic 64 bit counter
  2024-05-19 22:49                       ` Stephen Hemminger
@ 2024-05-20  7:57                         ` Morten Brørup
  0 siblings, 0 replies; 179+ messages in thread
From: Morten Brørup @ 2024-05-20  7:57 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Honnappa Nagarahalli, dev, nd, Richardson, Bruce

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Monday, 20 May 2024 00.49
> 
> On Sun, 19 May 2024 19:10:30 +0200
> Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> > Absolutely; whenever possible, local counters should be maintained
> inside the loop, and added to the public counters at the end of a loop.
> >
> > Please note that application counters might be spread all over memory.
> > E.g. per-flow in a flow structure, per QoS class in a QoS class
> structure, per subscriber in a subscriber structure, etc. And a burst of
> packets might touch multiple of these. My point is: Atomic read-modify-
> write of counters will cause serious stalling, waiting for memory access
> 
> If an application needs to keep up at DPDK possible speeds, then it
> needs to worry about its
> cache access patterns. Last time I checked handling 10G 64 byte packets
> at line rate without loss
> means a maximum of 2 cache misses. Very hard to do with any non trivial
> application.

Yes, very hard.
Which is why I insist that counters must have the absolutely highest possible performance in the fast path.
Non-trivial applications are likely to maintain many instances of application specific counters.

I consider this patch in the series as the EAL library generic 64 bit counters, and for application use too. So I am reviewing with a much broader perspective than just SW drivers.
The SW drivers' use of these counters is not only an improvement of those drivers, it's also an excellent reference use case showing how to use this new EAL 64 bit counters library.

> 
> Also, SW QoS above 1G is very hard to do with modern CPU's. Especially
> with multiple flows.
> It maybe possible with something like FQ Codel which only keeps small
> amount of state.

Yep. Designing software for high performance is all about using optimal algorithms! :-)


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v8 0/8] Common statistics routines for SW based PMD's
  2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
                     ` (9 preceding siblings ...)
  2024-05-17 17:35   ` [PATCH v7 0/9] Use weak atomic operations for SW PMD counters Stephen Hemminger
@ 2024-05-21 17:00   ` Stephen Hemminger
  2024-05-21 17:00     ` [PATCH v8 1/8] eal: generic 64 bit counter Stephen Hemminger
                       ` (7 more replies)
  2024-05-21 20:16   ` [PATCH v9 0/8] Common statistics for SW PMD's Stephen Hemminger
  2024-05-22 16:12   ` [PATCH v10 0/8] Common statistics for software PMD's Stephen Hemminger
  12 siblings, 8 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 17:00 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Adds common code for collecting basic statistics used by many
SW based PMD's such as af_packet, af_xdp, null, tap and ring.

This version uses two counters (current and offset) to implement
SMP safe reset on 64 bit platforms; this avoids having to do locked
operation in the fast path.  On 32 bit platforms, the compiler
will split access to 64 bit variables by default; therefore
it has to use std atomic.

By using common code, it also fixes missing counts in several drivers.
And fixes several bugs.

v8 - use counter and offset (based on mailing list discussion) to avoid
     having to locked operation on x86 64.

Stephen Hemminger (8):
  eal: generic 64 bit counter
  ethdev: add common counters for statistics
  net/af_packet: use generic SW stats
  net/af_xdp: use generic SW stats
  net/pcap: use generic SW stats
  net/ring: use generic SW stats
  net/tap: use generic SW stats
  net/null: use generic SW stats

 drivers/net/af_packet/rte_eth_af_packet.c |  78 +++--------
 drivers/net/af_xdp/rte_eth_af_xdp.c       |  85 ++++--------
 drivers/net/null/rte_eth_null.c           |  83 +++---------
 drivers/net/pcap/pcap_ethdev.c            | 100 ++++-----------
 drivers/net/ring/rte_eth_ring.c           |  63 +++------
 drivers/net/tap/rte_eth_tap.c             |  75 ++---------
 drivers/net/tap/rte_eth_tap.h             |  15 +--
 lib/eal/include/meson.build               |   1 +
 lib/eal/include/rte_counter.h             | 150 ++++++++++++++++++++++
 lib/ethdev/ethdev_swstats.c               | 101 +++++++++++++++
 lib/ethdev/ethdev_swstats.h               | 111 ++++++++++++++++
 lib/ethdev/meson.build                    |   2 +
 lib/ethdev/version.map                    |   3 +
 13 files changed, 486 insertions(+), 381 deletions(-)
 create mode 100644 lib/eal/include/rte_counter.h
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v8 1/8] eal: generic 64 bit counter
  2024-05-21 17:00   ` [PATCH v8 0/8] Common statistics routines for SW based PMD's Stephen Hemminger
@ 2024-05-21 17:00     ` Stephen Hemminger
  2024-05-21 17:00     ` [PATCH v8 2/8] ethdev: add common counters for statistics Stephen Hemminger
                       ` (6 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 17:00 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Morten Brørup, Tyler Retzlaff

This header implements 64 bit counters using atomic
operations but with a weak memory ordering so that
they are safe against load/store splits on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 lib/eal/include/meson.build   |   1 +
 lib/eal/include/rte_counter.h | 150 ++++++++++++++++++++++++++++++++++
 2 files changed, 151 insertions(+)
 create mode 100644 lib/eal/include/rte_counter.h

diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
index e94b056d46..c070dd0079 100644
--- a/lib/eal/include/meson.build
+++ b/lib/eal/include/meson.build
@@ -12,6 +12,7 @@ headers += files(
         'rte_class.h',
         'rte_common.h',
         'rte_compat.h',
+        'rte_counter.h',
         'rte_debug.h',
         'rte_dev.h',
         'rte_devargs.h',
diff --git a/lib/eal/include/rte_counter.h b/lib/eal/include/rte_counter.h
new file mode 100644
index 0000000000..f1c4c0e0e8
--- /dev/null
+++ b/lib/eal/include/rte_counter.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_COUNTER_H_
+#define _RTE_COUNTER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_compat.h>
+#include <rte_stdatomic.h>
+
+
+/**
+ * @file
+ * RTE Counter
+ *
+ * A counter is 64 bit value that is safe from split read/write.
+ * It assumes that only one cpu at a time  will update the counter,
+ * and another CPU may want to read it.
+ *
+ * This is a weaker subset of full atomic variables.
+ *
+ * The counters are subject to the restrictions of atomic variables
+ * in packed structures or unaligned.
+ */
+
+#ifdef RTE_ARCH_64
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * On native 64 bit platform, counter is implemented as basic
+ * 64 bit unsigned integer that only increases.
+ */
+typedef struct {
+	uint64_t current;
+	uint64_t offset;
+} rte_counter64_t;
+
+/**
+ * @internal
+ * Macro to implement read once (compiler barrier) using stdatomic.
+ * This is compiler barrier only.
+ */
+#define __rte_read_once(var)						\
+	rte_atomic_load_explicit((__rte_atomic typeof(&(var)))&(var),	\
+		rte_memory_order_consume)
+
+/**
+ * @internal
+ * Macro to implement write once (compiler barrier) using stdatomic.
+ * This is compiler barrier only.
+ */
+#define __rte_write_once(var, val)					    \
+	rte_atomic_store_explicit((__rte_atomic typeof(&(var)))&(var), val, \
+		rte_memory_order_release)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Add value to counter.
+ * Assumes this operation is only done by one thread on the object.
+ *
+ * @param counter
+ *    A pointer to the counter.
+ * @param val
+ *    The value to add to the counter.
+ */
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	counter->current += val;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Read a counter.
+ * This operation can be done by any thread.
+ *
+ * @param counter
+ *    A pointer to the counter.
+ * @return
+ *  The current value of the counter.
+ */
+__rte_experimental
+static inline uint64_t
+rte_counter64_read(const rte_counter64_t *counter)
+{
+	return __rte_read_once(counter->current) - __rte_read_once(counter->offset);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Reset a counter to zero.
+ * This operation can be done by any thread.
+ *
+ * @param counter
+ *    A pointer to the counter.
+ */
+__rte_experimental
+static inline void
+rte_counter64_reset(rte_counter64_t *counter)
+{
+	__rte_write_once(counter->offset, __rte_read_once(counter->current));
+}
+
+#else
+
+/* On 32 bit platform, need to use atomic to avoid load/store tearing */
+typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
+
+__rte_experimental
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
+}
+
+__rte_experimental
+static inline uint64_t
+rte_counter64_read(const rte_counter64_t *counter)
+{
+	return rte_atomic_load_explicit(counter, rte_memory_order_consume);
+}
+
+__rte_experimental
+static inline void
+rte_counter64_reset(rte_counter64_t *counter)
+{
+	rte_atomic_store_explicit(counter, 0, rte_memory_order_release);
+}
+
+#endif /* RTE_ARCH_64 */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_COUNTER_H_ */
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v8 2/8] ethdev: add common counters for statistics
  2024-05-21 17:00   ` [PATCH v8 0/8] Common statistics routines for SW based PMD's Stephen Hemminger
  2024-05-21 17:00     ` [PATCH v8 1/8] eal: generic 64 bit counter Stephen Hemminger
@ 2024-05-21 17:00     ` Stephen Hemminger
  2024-05-21 17:00     ` [PATCH v8 3/8] net/af_packet: use generic SW stats Stephen Hemminger
                       ` (5 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 17:00 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko

Introduce common helper routines for keeping track of per-queue
statistics in SW PMD's. The code in several drivers had
copy/pasted the same code for this, but had common issues
with 64 bit counters on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/ethdev/ethdev_swstats.c | 101 ++++++++++++++++++++++++++++++++
 lib/ethdev/ethdev_swstats.h | 111 ++++++++++++++++++++++++++++++++++++
 lib/ethdev/meson.build      |   2 +
 lib/ethdev/version.map      |   3 +
 4 files changed, 217 insertions(+)
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

diff --git a/lib/ethdev/ethdev_swstats.c b/lib/ethdev/ethdev_swstats.c
new file mode 100644
index 0000000000..f7975bdea7
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.c
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+
+#include <rte_config.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
+#include "rte_ethdev.h"
+#include "ethdev_driver.h"
+#include "ethdev_swstats.h"
+
+int
+rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset,
+			   struct rte_eth_stats *stats)
+{
+	uint64_t packets, bytes, errors;
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		const void *txq = dev->data->tx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (txq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)txq + tx_offset);
+		packets = rte_counter64_read(&counters->packets);
+		bytes = rte_counter64_read(&counters->bytes);
+		errors = rte_counter64_read(&counters->errors);
+
+		stats->opackets += packets;
+		stats->obytes += bytes;
+		stats->oerrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_opackets[i] = packets;
+			stats->q_obytes[i] = bytes;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		const void *rxq = dev->data->rx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)rxq + rx_offset);
+		packets = rte_counter64_read(&counters->packets);
+		bytes = rte_counter64_read(&counters->bytes);
+		errors = rte_counter64_read(&counters->errors);
+
+		stats->ipackets += packets;
+		stats->ibytes += bytes;
+		stats->ierrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_ipackets[i] = packets;
+			stats->q_ibytes[i] = bytes;
+		}
+	}
+
+	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
+	return 0;
+}
+
+int
+rte_eth_counters_reset(struct rte_eth_dev *dev, size_t tx_offset, size_t rx_offset)
+{
+	struct rte_eth_counters *counters;
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		void *txq  = dev->data->tx_queues[i];
+
+		if (txq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)txq + tx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		void *rxq  = dev->data->rx_queues[i];
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)rxq + rx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	return 0;
+}
diff --git a/lib/ethdev/ethdev_swstats.h b/lib/ethdev/ethdev_swstats.h
new file mode 100644
index 0000000000..37b8e43eb0
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_ETHDEV_SWSTATS_H_
+#define _RTE_ETHDEV_SWSTATS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ *
+ * Internal statistics counters for software based devices.
+ * Hardware PMD's should use the hardware counters instead.
+ *
+ * This provides a library for PMD's to keep track of packets and bytes.
+ * It is assumed that this will be used per queue and queues are not
+ * shared by lcores.
+ */
+
+#include <rte_counter.h>
+
+/**
+ * A structure to be embedded in the device driver per-queue data.
+ */
+struct rte_eth_counters {
+	rte_counter64_t	packets;	/**< Total number of packets. */
+	rte_counter64_t	bytes;		/**< Total number of bytes. */
+	rte_counter64_t	errors;		/**< Total number of packets with errors. */
+};
+
+/**
+ * @internal
+ * Increment counters for a single packet.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ * @param packets
+ *    Number of packets to count
+ * @param bytes
+ *    Total size of all packet in bytes.
+ */
+__rte_internal
+static inline void
+rte_eth_count_packets(struct rte_eth_counters *counters,
+		      uint16_t packets, uint32_t bytes)
+{
+	rte_counter64_add(&counters->packets, packets);
+	rte_counter64_add(&counters->bytes, bytes);
+}
+
+/**
+ * @internal
+ * Increment error counter.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ */
+__rte_internal
+static inline void
+rte_eth_count_error(struct rte_eth_counters *counters)
+{
+	rte_counter64_add(&counters->errors, 1);
+}
+
+/**
+ * @internal
+ * Retrieve the general statistics for all queues.
+ * @see rte_eth_stats_get.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @param stats
+ *   A pointer to a structure of type *rte_eth_stats* to be filled
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			       size_t tx_offset, size_t rx_offset,
+			       struct rte_eth_stats *stats);
+
+/**
+ * @internal
+ * Reset the statistics for all queues.
+ * @see rte_eth_stats_reset.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_reset(struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHDEV_SWSTATS_H_ */
diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build
index f1d2586591..7ce29a46d4 100644
--- a/lib/ethdev/meson.build
+++ b/lib/ethdev/meson.build
@@ -3,6 +3,7 @@
 
 sources = files(
         'ethdev_driver.c',
+        'ethdev_swstats.c',
         'ethdev_private.c',
         'ethdev_profile.c',
         'ethdev_trace_points.c',
@@ -42,6 +43,7 @@ driver_sdk_headers += files(
         'ethdev_driver.h',
         'ethdev_pci.h',
         'ethdev_vdev.h',
+        'ethdev_swstats.h',
 )
 
 if is_linux
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 79f6f5293b..fc595be278 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -358,4 +358,7 @@ INTERNAL {
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
 	rte_flow_fp_default_ops;
+
+	rte_eth_counters_reset;
+	rte_eth_counters_stats_get;
 };
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v8 3/8] net/af_packet: use generic SW stats
  2024-05-21 17:00   ` [PATCH v8 0/8] Common statistics routines for SW based PMD's Stephen Hemminger
  2024-05-21 17:00     ` [PATCH v8 1/8] eal: generic 64 bit counter Stephen Hemminger
  2024-05-21 17:00     ` [PATCH v8 2/8] ethdev: add common counters for statistics Stephen Hemminger
@ 2024-05-21 17:00     ` Stephen Hemminger
  2024-05-21 17:00     ` [PATCH v8 4/8] net/af_xdp: " Stephen Hemminger
                       ` (4 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 17:00 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, John W. Linville

Use the new generic SW stats.
Add a note about how errors and kernel full should be handled.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_packet/rte_eth_af_packet.c | 78 +++++------------------
 1 file changed, 17 insertions(+), 61 deletions(-)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 397a32db58..64fa519812 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -10,6 +10,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
@@ -51,8 +52,7 @@ struct pkt_rx_queue {
 	uint16_t in_port;
 	uint8_t vlan_strip;
 
-	volatile unsigned long rx_pkts;
-	volatile unsigned long rx_bytes;
+	struct rte_eth_counters stats;
 };
 
 struct pkt_tx_queue {
@@ -64,11 +64,10 @@ struct pkt_tx_queue {
 	unsigned int framecount;
 	unsigned int framenum;
 
-	volatile unsigned long tx_pkts;
-	volatile unsigned long err_pkts;
-	volatile unsigned long tx_bytes;
+	struct rte_eth_counters stats;
 };
 
+
 struct pmd_internals {
 	unsigned nb_queues;
 
@@ -168,9 +167,10 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		num_rx_bytes += mbuf->pkt_len;
 	}
 	pkt_q->framenum = framenum;
-	pkt_q->rx_pkts += num_rx;
-	pkt_q->rx_bytes += num_rx_bytes;
-	return num_rx;
+
+	rte_eth_count_packets(&pkt_q->stats, num_rx, num_rx_bytes);
+
+	return i;
 }
 
 /*
@@ -294,19 +294,16 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	if (sendto(pkt_q->sockfd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1 &&
 			errno != ENOBUFS && errno != EAGAIN) {
 		/*
-		 * In case of a ENOBUFS/EAGAIN error all of the enqueued
-		 * packets will be considered successful even though only some
-		 * are sent.
+		 * FIXME: if sendto fails kernel is busy should return 0
+		 * and not free the mbufs. Other errors should free the
+		 * buts and increment the tx error count.
 		 */
-
 		num_tx = 0;
 		num_tx_bytes = 0;
 	}
 
 	pkt_q->framenum = framenum;
-	pkt_q->tx_pkts += num_tx;
-	pkt_q->err_pkts += i - num_tx;
-	pkt_q->tx_bytes += num_tx_bytes;
+	rte_eth_count_packets(&pkt_q->stats, num_tx, num_tx_bytes);
 	return i;
 }
 
@@ -386,58 +383,17 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
-		igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
-		rx_total += igb_stats->q_ipackets[i];
-		rx_bytes_total += igb_stats->q_ibytes[i];
-	}
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
-		igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
-		tx_total += igb_stats->q_opackets[i];
-		tx_err_total += internal->tx_queue[i].err_pkts;
-		tx_bytes_total += igb_stats->q_obytes[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->ibytes = rx_bytes_total;
-	igb_stats->opackets = tx_total;
-	igb_stats->oerrors = tx_err_total;
-	igb_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+					  offsetof(struct pkt_rx_queue, stats), stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->rx_queue[i].rx_pkts = 0;
-		internal->rx_queue[i].rx_bytes = 0;
-	}
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->tx_queue[i].tx_pkts = 0;
-		internal->tx_queue[i].err_pkts = 0;
-		internal->tx_queue[i].tx_bytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 static int
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v8 4/8] net/af_xdp: use generic SW stats
  2024-05-21 17:00   ` [PATCH v8 0/8] Common statistics routines for SW based PMD's Stephen Hemminger
                       ` (2 preceding siblings ...)
  2024-05-21 17:00     ` [PATCH v8 3/8] net/af_packet: use generic SW stats Stephen Hemminger
@ 2024-05-21 17:00     ` Stephen Hemminger
  2024-05-21 17:00     ` [PATCH v8 5/8] net/pcap: " Stephen Hemminger
                       ` (3 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 17:00 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Ciara Loftus

Use common code for all SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 85 ++++++++---------------------
 1 file changed, 22 insertions(+), 63 deletions(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 6ba455bb9b..c563621798 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -20,6 +20,7 @@
 #include <rte_ethdev.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
 #include <rte_string_fns.h>
@@ -120,19 +121,13 @@ struct xsk_umem_info {
 	uint32_t max_xsks;
 };
 
-struct rx_stats {
-	uint64_t rx_pkts;
-	uint64_t rx_bytes;
-	uint64_t rx_dropped;
-};
-
 struct pkt_rx_queue {
 	struct xsk_ring_cons rx;
 	struct xsk_umem_info *umem;
 	struct xsk_socket *xsk;
 	struct rte_mempool *mb_pool;
 
-	struct rx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct xsk_ring_prod fq;
 	struct xsk_ring_cons cq;
@@ -143,17 +138,11 @@ struct pkt_rx_queue {
 	int busy_budget;
 };
 
-struct tx_stats {
-	uint64_t tx_pkts;
-	uint64_t tx_bytes;
-	uint64_t tx_dropped;
-};
-
 struct pkt_tx_queue {
 	struct xsk_ring_prod tx;
 	struct xsk_umem_info *umem;
 
-	struct tx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct pkt_rx_queue *pair;
 	int xsk_queue_idx;
@@ -369,9 +358,7 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	xsk_ring_cons__release(rx, nb_pkts);
 	(void)reserve_fill_queue(umem, nb_pkts, fq_bufs, fq);
 
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
+	rte_eth_count_packets(&rxq->stats, nb_pkts, rx_bytes);
 
 	return nb_pkts;
 }
@@ -429,10 +416,7 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	}
 
 	xsk_ring_cons__release(rx, nb_pkts);
-
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
+	rte_eth_count_packets(&rxq->stats, nb_pkts, rx_bytes);
 
 	return nb_pkts;
 }
@@ -558,6 +542,7 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 					umem->mb_pool->header_size;
 			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
 			desc->addr = addr | offset;
+			tx_bytes += mbuf->pkt_len;
 			count++;
 		} else {
 			struct rte_mbuf *local_mbuf =
@@ -585,20 +570,17 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			desc->addr = addr | offset;
 			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
 					desc->len);
+			tx_bytes += mbuf->pkt_len;
 			rte_pktmbuf_free(mbuf);
 			count++;
 		}
-
-		tx_bytes += mbuf->pkt_len;
 	}
 
 out:
 	xsk_ring_prod__submit(&txq->tx, count);
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += count;
-	txq->stats.tx_bytes += tx_bytes;
-	txq->stats.tx_dropped += nb_pkts - count;
+	rte_eth_count_packets(&txq->stats, count, tx_bytes);
 
 	return count;
 }
@@ -648,8 +630,7 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += nb_pkts;
-	txq->stats.tx_bytes += tx_bytes;
+	rte_eth_count_packets(&txq->stats, nb_pkts, tx_bytes);
 
 	return nb_pkts;
 }
@@ -847,39 +828,26 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
 	struct pmd_process_private *process_private = dev->process_private;
-	struct xdp_statistics xdp_stats;
-	struct pkt_rx_queue *rxq;
-	struct pkt_tx_queue *txq;
-	socklen_t optlen;
-	int i, ret, fd;
+	unsigned int i;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		optlen = sizeof(struct xdp_statistics);
-		rxq = &internals->rx_queues[i];
-		txq = rxq->pair;
-		stats->q_ipackets[i] = rxq->stats.rx_pkts;
-		stats->q_ibytes[i] = rxq->stats.rx_bytes;
+	rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+				   offsetof(struct pkt_rx_queue, stats), stats);
 
-		stats->q_opackets[i] = txq->stats.tx_pkts;
-		stats->q_obytes[i] = txq->stats.tx_bytes;
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct xdp_statistics xdp_stats;
+		socklen_t optlen = sizeof(xdp_stats);
+		int fd;
 
-		stats->ipackets += stats->q_ipackets[i];
-		stats->ibytes += stats->q_ibytes[i];
-		stats->imissed += rxq->stats.rx_dropped;
-		stats->oerrors += txq->stats.tx_dropped;
 		fd = process_private->rxq_xsk_fds[i];
-		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
-					   &xdp_stats, &optlen) : -1;
-		if (ret != 0) {
+		if (fd < 0)
+			continue;
+		if (getsockopt(fd, SOL_XDP, XDP_STATISTICS,
+			       &xdp_stats, &optlen)  < 0) {
 			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
 			return -1;
 		}
 		stats->imissed += xdp_stats.rx_dropped;
-
-		stats->opackets += stats->q_opackets[i];
-		stats->obytes += stats->q_obytes[i];
 	}
 
 	return 0;
@@ -888,17 +856,8 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
-	int i;
-
-	for (i = 0; i < internals->queue_cnt; i++) {
-		memset(&internals->rx_queues[i].stats, 0,
-					sizeof(struct rx_stats));
-		memset(&internals->tx_queues[i].stats, 0,
-					sizeof(struct tx_stats));
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 #ifdef RTE_NET_AF_XDP_LIBBPF_XDP_ATTACH
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v8 5/8] net/pcap: use generic SW stats
  2024-05-21 17:00   ` [PATCH v8 0/8] Common statistics routines for SW based PMD's Stephen Hemminger
                       ` (3 preceding siblings ...)
  2024-05-21 17:00     ` [PATCH v8 4/8] net/af_xdp: " Stephen Hemminger
@ 2024-05-21 17:00     ` Stephen Hemminger
  2024-05-21 17:00     ` [PATCH v8 6/8] net/ring: " Stephen Hemminger
                       ` (2 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 17:00 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use common statistics for SW drivers.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/pcap/pcap_ethdev.c | 100 ++++++++-------------------------
 1 file changed, 22 insertions(+), 78 deletions(-)

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index bfec085045..4689359527 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -11,6 +11,7 @@
 
 #include <rte_cycles.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <ethdev_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_malloc.h>
@@ -48,13 +49,6 @@ static uint8_t iface_idx;
 static uint64_t timestamp_rx_dynflag;
 static int timestamp_dynfield_offset = -1;
 
-struct queue_stat {
-	volatile unsigned long pkts;
-	volatile unsigned long bytes;
-	volatile unsigned long err_pkts;
-	volatile unsigned long rx_nombuf;
-};
-
 struct queue_missed_stat {
 	/* last value retrieved from pcap */
 	unsigned int pcap;
@@ -68,7 +62,7 @@ struct pcap_rx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
 	struct rte_mempool *mb_pool;
-	struct queue_stat rx_stat;
+	struct rte_eth_counters rx_stat;
 	struct queue_missed_stat missed_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
@@ -80,7 +74,7 @@ struct pcap_rx_queue {
 struct pcap_tx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
-	struct queue_stat tx_stat;
+	struct rte_eth_counters tx_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
 };
@@ -258,14 +252,13 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		bufs[i]->data_len = pcap_buf->data_len;
 		bufs[i]->pkt_len = pcap_buf->pkt_len;
 		bufs[i]->port = pcap_q->port_id;
-		rx_bytes += pcap_buf->data_len;
+		rx_bytes += pcap_buf->pkt_len;
 
 		/* Enqueue packet back on ring to allow infinite rx. */
 		rte_ring_enqueue(pcap_q->pkts, pcap_buf);
 	}
 
-	pcap_q->rx_stat.pkts += i;
-	pcap_q->rx_stat.bytes += rx_bytes;
+	rte_eth_count_packets(&pcap_q->rx_stat, i, rx_bytes);
 
 	return i;
 }
@@ -300,7 +293,9 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		mbuf = rte_pktmbuf_alloc(pcap_q->mb_pool);
 		if (unlikely(mbuf == NULL)) {
-			pcap_q->rx_stat.rx_nombuf++;
+			struct rte_eth_dev *dev = &rte_eth_devices[pcap_q->port_id];
+
+			++dev->data->rx_mbuf_alloc_failed;
 			break;
 		}
 
@@ -315,7 +310,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 						       mbuf,
 						       packet,
 						       header.caplen) == -1)) {
-				pcap_q->rx_stat.err_pkts++;
+				rte_eth_count_error(&pcap_q->rx_stat);
 				rte_pktmbuf_free(mbuf);
 				break;
 			}
@@ -332,9 +327,8 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		num_rx++;
 		rx_bytes += header.caplen;
 	}
-	pcap_q->rx_stat.pkts += num_rx;
-	pcap_q->rx_stat.bytes += rx_bytes;
 
+	rte_eth_count_packets(&pcap_q->rx_stat, num_rx, rx_bytes);
 	return num_rx;
 }
 
@@ -423,9 +417,8 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	 * we flush the pcap dumper within each burst.
 	 */
 	pcap_dump_flush(dumper);
-	dumper_q->tx_stat.pkts += num_tx;
-	dumper_q->tx_stat.bytes += tx_bytes;
-	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
+
+	rte_eth_count_packets(&dumper_q->tx_stat, num_tx, tx_bytes);
 
 	return nb_pkts;
 }
@@ -448,9 +441,7 @@ eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	tx_queue->tx_stat.pkts += nb_pkts;
-	tx_queue->tx_stat.bytes += tx_bytes;
-
+	rte_eth_count_packets(&tx_queue->tx_stat, nb_pkts, tx_bytes);
 	return i;
 }
 
@@ -502,9 +493,7 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		rte_pktmbuf_free(mbuf);
 	}
 
-	tx_queue->tx_stat.pkts += num_tx;
-	tx_queue->tx_stat.bytes += tx_bytes;
-	tx_queue->tx_stat.err_pkts += i - num_tx;
+	rte_eth_count_packets(&tx_queue->tx_stat, num_tx, tx_bytes);
 
 	return i;
 }
@@ -746,41 +735,12 @@ static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned int i;
-	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
-	unsigned long rx_missed_total = 0;
-	unsigned long rx_nombuf_total = 0, rx_err_total = 0;
-	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
-	unsigned long tx_packets_err_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_queue[i].rx_stat.pkts;
-		stats->q_ibytes[i] = internal->rx_queue[i].rx_stat.bytes;
-		rx_nombuf_total += internal->rx_queue[i].rx_stat.rx_nombuf;
-		rx_err_total += internal->rx_queue[i].rx_stat.err_pkts;
-		rx_packets_total += stats->q_ipackets[i];
-		rx_bytes_total += stats->q_ibytes[i];
-		rx_missed_total += queue_missed_stat_get(dev, i);
-	}
 
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_queue[i].tx_stat.pkts;
-		stats->q_obytes[i] = internal->tx_queue[i].tx_stat.bytes;
-		tx_packets_total += stats->q_opackets[i];
-		tx_bytes_total += stats->q_obytes[i];
-		tx_packets_err_total += internal->tx_queue[i].tx_stat.err_pkts;
-	}
+	rte_eth_counters_stats_get(dev, offsetof(struct pcap_tx_queue, tx_stat),
+				   offsetof(struct pcap_rx_queue, rx_stat), stats);
 
-	stats->ipackets = rx_packets_total;
-	stats->ibytes = rx_bytes_total;
-	stats->imissed = rx_missed_total;
-	stats->ierrors = rx_err_total;
-	stats->rx_nombuf = rx_nombuf_total;
-	stats->opackets = tx_packets_total;
-	stats->obytes = tx_bytes_total;
-	stats->oerrors = tx_packets_err_total;
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		stats->imissed += queue_missed_stat_get(dev, i);
 
 	return 0;
 }
@@ -789,21 +749,12 @@ static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
 	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		internal->rx_queue[i].rx_stat.pkts = 0;
-		internal->rx_queue[i].rx_stat.bytes = 0;
-		internal->rx_queue[i].rx_stat.err_pkts = 0;
-		internal->rx_queue[i].rx_stat.rx_nombuf = 0;
-		queue_missed_stat_reset(dev, i);
-	}
+	rte_eth_counters_reset(dev, offsetof(struct pcap_tx_queue, tx_stat),
+			       offsetof(struct pcap_rx_queue, rx_stat));
 
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		internal->tx_queue[i].tx_stat.pkts = 0;
-		internal->tx_queue[i].tx_stat.bytes = 0;
-		internal->tx_queue[i].tx_stat.err_pkts = 0;
-	}
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		queue_missed_stat_reset(dev, i);
 
 	return 0;
 }
@@ -929,13 +880,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 				pcap_pkt_count);
 			return -EINVAL;
 		}
-
-		/*
-		 * Reset the stats for this queue since eth_pcap_rx calls above
-		 * didn't result in the application receiving packets.
-		 */
-		pcap_q->rx_stat.pkts = 0;
-		pcap_q->rx_stat.bytes = 0;
 	}
 
 	return 0;
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v8 6/8] net/ring: use generic SW stats
  2024-05-21 17:00   ` [PATCH v8 0/8] Common statistics routines for SW based PMD's Stephen Hemminger
                       ` (4 preceding siblings ...)
  2024-05-21 17:00     ` [PATCH v8 5/8] net/pcap: " Stephen Hemminger
@ 2024-05-21 17:00     ` Stephen Hemminger
  2024-05-21 17:00     ` [PATCH v8 7/8] net/tap: " Stephen Hemminger
  2024-05-21 17:00     ` [PATCH v8 8/8] net/null: " Stephen Hemminger
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 17:00 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Bruce Richardson

Use generic per-queue infrastructure.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/ring/rte_eth_ring.c | 63 +++++++++++----------------------
 1 file changed, 20 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index b16f5d55f2..36053e4038 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -7,6 +7,7 @@
 #include "rte_eth_ring.h"
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <rte_os_shim.h>
@@ -44,8 +45,8 @@ enum dev_action {
 
 struct ring_queue {
 	struct rte_ring *rng;
-	RTE_ATOMIC(uint64_t) rx_pkts;
-	RTE_ATOMIC(uint64_t) tx_pkts;
+
+	struct rte_eth_counters stats;
 };
 
 struct pmd_internals {
@@ -77,12 +78,12 @@ eth_ring_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
 	struct ring_queue *r = q;
-	const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SC_DEQ)
-		r->rx_pkts += nb_rx;
-	else
-		rte_atomic_fetch_add_explicit(&r->rx_pkts, nb_rx, rte_memory_order_relaxed);
+	uint16_t nb_rx;
+
+	nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	rte_counter64_add(&r->stats.packets, nb_rx);
+
 	return nb_rx;
 }
 
@@ -91,12 +92,12 @@ eth_ring_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
 	struct ring_queue *r = q;
-	const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SP_ENQ)
-		r->tx_pkts += nb_tx;
-	else
-		rte_atomic_fetch_add_explicit(&r->tx_pkts, nb_tx, rte_memory_order_relaxed);
+	uint16_t nb_tx;
+
+	nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	rte_counter64_add(&r->stats.packets, nb_tx);
+
 	return nb_tx;
 }
 
@@ -193,40 +194,16 @@ eth_dev_info(struct rte_eth_dev *dev,
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
-		rx_total += stats->q_ipackets[i];
-	}
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
-		tx_total += stats->q_opackets[i];
-	}
-
-	stats->ipackets = rx_total;
-	stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct ring_queue, stats),
+					  offsetof(struct ring_queue, stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		internal->rx_ring_queues[i].rx_pkts = 0;
-	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		internal->tx_ring_queues[i].tx_pkts = 0;
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct ring_queue, stats),
+				      offsetof(struct ring_queue, stats));
 }
 
 static void
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v8 7/8] net/tap: use generic SW stats
  2024-05-21 17:00   ` [PATCH v8 0/8] Common statistics routines for SW based PMD's Stephen Hemminger
                       ` (5 preceding siblings ...)
  2024-05-21 17:00     ` [PATCH v8 6/8] net/ring: " Stephen Hemminger
@ 2024-05-21 17:00     ` Stephen Hemminger
  2024-05-21 17:00     ` [PATCH v8 8/8] net/null: " Stephen Hemminger
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 17:00 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use new common sw statistics.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/tap/rte_eth_tap.c | 75 ++++++-----------------------------
 drivers/net/tap/rte_eth_tap.h | 15 ++-----
 2 files changed, 16 insertions(+), 74 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 69d9da695b..9cc923fd0c 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -455,7 +455,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* Packet couldn't fit in the provided mbuf */
 		if (unlikely(rxq->pi.flags & TUN_PKT_STRIP)) {
-			rxq->stats.ierrors++;
+			rte_eth_count_error(&rxq->stats);
 			continue;
 		}
 
@@ -467,7 +467,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
 
 			if (unlikely(!buf)) {
-				rxq->stats.rx_nombuf++;
+				struct rte_eth_dev *dev = &rte_eth_devices[rxq->in_port];
+				++dev->data->rx_mbuf_alloc_failed;
+
 				/* No new buf has been allocated: do nothing */
 				if (!new_tail || !seg)
 					goto end;
@@ -512,8 +514,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		num_rx_bytes += mbuf->pkt_len;
 	}
 end:
-	rxq->stats.ipackets += num_rx;
-	rxq->stats.ibytes += num_rx_bytes;
+	rte_eth_count_packets(&rxq->stats, num_rx, num_rx_bytes);
 
 	if (trigger && num_rx < nb_pkts)
 		rxq->trigger_seen = trigger;
@@ -693,7 +694,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			tso_segsz = mbuf_in->tso_segsz + hdrs_len;
 			if (unlikely(tso_segsz == hdrs_len) ||
 				tso_segsz > *txq->mtu) {
-				txq->stats.errs++;
+				rte_eth_count_error(&txq->stats);
 				break;
 			}
 			gso_ctx->gso_size = tso_segsz;
@@ -731,7 +732,8 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		ret = tap_write_mbufs(txq, num_mbufs, mbuf,
 				&num_packets, &num_tx_bytes);
 		if (ret == -1) {
-			txq->stats.errs++;
+			rte_eth_count_error(&txq->stats);
+
 			/* free tso mbufs */
 			if (num_tso_mbufs > 0)
 				rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs);
@@ -749,9 +751,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		}
 	}
 
-	txq->stats.opackets += num_packets;
-	txq->stats.errs += nb_pkts - num_tx;
-	txq->stats.obytes += num_tx_bytes;
+	rte_eth_count_packets(&txq->stats, num_packets, num_tx_bytes);
 
 	return num_tx;
 }
@@ -1055,64 +1055,15 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
 {
-	unsigned int i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	unsigned long rx_nombuf = 0, ierrors = 0;
-	const struct pmd_internals *pmd = dev->data->dev_private;
-
-	/* rx queue statistics */
-	imax = (dev->data->nb_rx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_rx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
-		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
-		rx_total += tap_stats->q_ipackets[i];
-		rx_bytes_total += tap_stats->q_ibytes[i];
-		rx_nombuf += pmd->rxq[i].stats.rx_nombuf;
-		ierrors += pmd->rxq[i].stats.ierrors;
-	}
-
-	/* tx queue statistics */
-	imax = (dev->data->nb_tx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_tx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
-		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
-		tx_total += tap_stats->q_opackets[i];
-		tx_err_total += pmd->txq[i].stats.errs;
-		tx_bytes_total += tap_stats->q_obytes[i];
-	}
-
-	tap_stats->ipackets = rx_total;
-	tap_stats->ibytes = rx_bytes_total;
-	tap_stats->ierrors = ierrors;
-	tap_stats->rx_nombuf = rx_nombuf;
-	tap_stats->opackets = tx_total;
-	tap_stats->oerrors = tx_err_total;
-	tap_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct tx_queue, stats),
+					  offsetof(struct rx_queue, stats), tap_stats);
 }
 
 static int
 tap_stats_reset(struct rte_eth_dev *dev)
 {
-	int i;
-	struct pmd_internals *pmd = dev->data->dev_private;
-
-	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-		pmd->rxq[i].stats.ipackets = 0;
-		pmd->rxq[i].stats.ibytes = 0;
-		pmd->rxq[i].stats.ierrors = 0;
-		pmd->rxq[i].stats.rx_nombuf = 0;
-
-		pmd->txq[i].stats.opackets = 0;
-		pmd->txq[i].stats.errs = 0;
-		pmd->txq[i].stats.obytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct tx_queue, stats),
+				      offsetof(struct rx_queue, stats));
 }
 
 static int
diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
index 5ac93f93e9..8cba9ea410 100644
--- a/drivers/net/tap/rte_eth_tap.h
+++ b/drivers/net/tap/rte_eth_tap.h
@@ -14,6 +14,7 @@
 #include <linux/if_tun.h>
 
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_ether.h>
 #include <rte_gso.h>
 #include "tap_log.h"
@@ -32,23 +33,13 @@ enum rte_tuntap_type {
 	ETH_TUNTAP_TYPE_MAX,
 };
 
-struct pkt_stats {
-	uint64_t opackets;              /* Number of output packets */
-	uint64_t ipackets;              /* Number of input packets */
-	uint64_t obytes;                /* Number of bytes on output */
-	uint64_t ibytes;                /* Number of bytes on input */
-	uint64_t errs;                  /* Number of TX error packets */
-	uint64_t ierrors;               /* Number of RX error packets */
-	uint64_t rx_nombuf;             /* Nb of RX mbuf alloc failures */
-};
-
 struct rx_queue {
 	struct rte_mempool *mp;         /* Mempool for RX packets */
 	uint32_t trigger_seen;          /* Last seen Rx trigger value */
 	uint16_t in_port;               /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-	struct pkt_stats stats;         /* Stats for this RX queue */
 	uint16_t nb_rx_desc;            /* max number of mbufs available */
+	struct rte_eth_counters stats;  /* Stats for this RX queue */
 	struct rte_eth_rxmode *rxmode;  /* RX features */
 	struct rte_mbuf *pool;          /* mbufs pool for this queue */
 	struct iovec (*iovecs)[];       /* descriptors for this queue */
@@ -59,7 +50,7 @@ struct tx_queue {
 	int type;                       /* Type field - TUN|TAP */
 	uint16_t *mtu;                  /* Pointer to MTU from dev_data */
 	uint16_t csum:1;                /* Enable checksum offloading */
-	struct pkt_stats stats;         /* Stats for this TX queue */
+	struct rte_eth_counters stats;	/* Stats for this TX queue */
 	struct rte_gso_ctx gso_ctx;     /* GSO context */
 	uint16_t out_port;              /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v8 8/8] net/null: use generic SW stats
  2024-05-21 17:00   ` [PATCH v8 0/8] Common statistics routines for SW based PMD's Stephen Hemminger
                       ` (6 preceding siblings ...)
  2024-05-21 17:00     ` [PATCH v8 7/8] net/tap: " Stephen Hemminger
@ 2024-05-21 17:00     ` Stephen Hemminger
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 17:00 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Tetsuya Mukawa

Use the new common code for statistics.
This also fixes the bug that this driver was not accounting
for bytes.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/null/rte_eth_null.c | 83 +++++++++------------------------
 1 file changed, 21 insertions(+), 62 deletions(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index f4ed3b8a7f..83add9c819 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -8,6 +8,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <bus_vdev_driver.h>
@@ -37,8 +38,8 @@ struct null_queue {
 	struct rte_mempool *mb_pool;
 	struct rte_mbuf *dummy_packet;
 
-	RTE_ATOMIC(uint64_t) rx_pkts;
-	RTE_ATOMIC(uint64_t) tx_pkts;
+	struct rte_eth_counters tx_stats;
+	struct rte_eth_counters rx_stats;
 };
 
 struct pmd_options {
@@ -101,9 +102,7 @@ eth_null_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->rx_pkts, i, rte_memory_order_seq_cst);
-
+	rte_eth_count_packets(&h->rx_stats, i, i * packet_size);
 	return i;
 }
 
@@ -129,8 +128,7 @@ eth_null_copy_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->rx_pkts, i, rte_memory_order_seq_cst);
+	rte_eth_count_packets(&h->rx_stats, i, i * packet_size);
 
 	return i;
 }
@@ -147,16 +145,17 @@ eth_null_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	int i;
 	struct null_queue *h = q;
+	uint32_t tx_bytes = 0;
 
 	if ((q == NULL) || (bufs == NULL))
 		return 0;
 
-	for (i = 0; i < nb_bufs; i++)
+	for (i = 0; i < nb_bufs; i++) {
+		tx_bytes += rte_pktmbuf_pkt_len(bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
+	}
 
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->tx_pkts, i, rte_memory_order_seq_cst);
-
+	rte_eth_count_packets(&h->tx_stats, i, tx_bytes);
 	return i;
 }
 
@@ -166,20 +165,20 @@ eth_null_copy_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	int i;
 	struct null_queue *h = q;
 	unsigned int packet_size;
+	uint32_t tx_bytes = 0;
 
 	if ((q == NULL) || (bufs == NULL))
 		return 0;
 
 	packet_size = h->internals->packet_size;
 	for (i = 0; i < nb_bufs; i++) {
+		tx_bytes += rte_pktmbuf_pkt_len(bufs[i]);
 		rte_memcpy(h->dummy_packet, rte_pktmbuf_mtod(bufs[i], void *),
 					packet_size);
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->tx_pkts, i, rte_memory_order_seq_cst);
-
+	rte_eth_count_packets(&h->tx_stats, i, tx_bytes);
 	return i;
 }
 
@@ -322,60 +321,20 @@ eth_dev_info(struct rte_eth_dev *dev,
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i, num_stats;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal;
-
-	if ((dev == NULL) || (igb_stats == NULL))
-		return -EINVAL;
-
-	internal = dev->data->dev_private;
-	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
-			RTE_MIN(dev->data->nb_rx_queues,
-				RTE_DIM(internal->rx_null_queues)));
-	for (i = 0; i < num_stats; i++) {
-		/* NOTE: review for atomic access */
-		igb_stats->q_ipackets[i] =
-			internal->rx_null_queues[i].rx_pkts;
-		rx_total += igb_stats->q_ipackets[i];
-	}
-
-	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
-			RTE_MIN(dev->data->nb_tx_queues,
-				RTE_DIM(internal->tx_null_queues)));
-	for (i = 0; i < num_stats; i++) {
-		/* NOTE: review for atomic access */
-		igb_stats->q_opackets[i] =
-			internal->tx_null_queues[i].tx_pkts;
-		tx_total += igb_stats->q_opackets[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev,
+					  offsetof(struct null_queue, tx_stats),
+					  offsetof(struct null_queue, rx_stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal;
-
-	if (dev == NULL)
-		return -EINVAL;
-
-	internal = dev->data->dev_private;
-	for (i = 0; i < RTE_DIM(internal->rx_null_queues); i++)
-		/* NOTE: review for atomic access */
-		internal->rx_null_queues[i].rx_pkts = 0;
-	for (i = 0; i < RTE_DIM(internal->tx_null_queues); i++)
-		/* NOTE: review for atomic access */
-		internal->tx_null_queues[i].tx_pkts = 0;
-
-	return 0;
+	return rte_eth_counters_reset(dev,
+				      offsetof(struct null_queue, tx_stats),
+				      offsetof(struct null_queue, rx_stats));
 }
 
 static void
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v9 0/8] Common statistics for SW PMD's
  2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
                     ` (10 preceding siblings ...)
  2024-05-21 17:00   ` [PATCH v8 0/8] Common statistics routines for SW based PMD's Stephen Hemminger
@ 2024-05-21 20:16   ` Stephen Hemminger
  2024-05-21 20:16     ` [PATCH v9 1/8] eal: generic 64 bit counter Stephen Hemminger
                       ` (7 more replies)
  2024-05-22 16:12   ` [PATCH v10 0/8] Common statistics for software PMD's Stephen Hemminger
  12 siblings, 8 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 20:16 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Adds common code for collecting basic statistics used by many
SW based PMD's such as af_packet, af_xdp, null, tap and ring.

This version uses two counters (current and offset) to implement
SMP safe reset on 64 bit platforms; this avoids having to do locked
operation in the fast path.  On 32 bit platforms, the compiler
will split access to 64 bit variables by default; therefore
it has to use std atomic.

By using common code, it also fixes missing counts in several drivers.
And fixes several bugs.

v9 - change includes in rte_counter.h to fix build on some
     versions

Stephen Hemminger (8):
  eal: generic 64 bit counter
  ethdev: add common counters for statistics
  net/af_packet: use generic SW stats
  net/af_xdp: use generic SW stats
  net/pcap: use generic SW stats
  net/ring: use generic SW stats
  net/tap: use generic SW stats
  net/null: use generic SW stats

 drivers/net/af_packet/rte_eth_af_packet.c |  78 +++--------
 drivers/net/af_xdp/rte_eth_af_xdp.c       |  85 ++++--------
 drivers/net/null/rte_eth_null.c           |  83 +++---------
 drivers/net/pcap/pcap_ethdev.c            | 100 ++++-----------
 drivers/net/ring/rte_eth_ring.c           |  63 +++------
 drivers/net/tap/rte_eth_tap.c             |  75 ++---------
 drivers/net/tap/rte_eth_tap.h             |  15 +--
 lib/eal/include/meson.build               |   1 +
 lib/eal/include/rte_counter.h             | 150 ++++++++++++++++++++++
 lib/ethdev/ethdev_swstats.c               | 101 +++++++++++++++
 lib/ethdev/ethdev_swstats.h               | 111 ++++++++++++++++
 lib/ethdev/meson.build                    |   2 +
 lib/ethdev/version.map                    |   3 +
 13 files changed, 486 insertions(+), 381 deletions(-)
 create mode 100644 lib/eal/include/rte_counter.h
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v9 1/8] eal: generic 64 bit counter
  2024-05-21 20:16   ` [PATCH v9 0/8] Common statistics for SW PMD's Stephen Hemminger
@ 2024-05-21 20:16     ` Stephen Hemminger
  2024-05-22  8:31       ` Morten Brørup
  2024-05-21 20:16     ` [PATCH v9 2/8] ethdev: add common counters for statistics Stephen Hemminger
                       ` (6 subsequent siblings)
  7 siblings, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 20:16 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Morten Brørup, Tyler Retzlaff

This header implements 64 bit counters using atomic
operations but with a weak memory ordering so that
they are safe against load/store splits on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 lib/eal/include/meson.build   |   1 +
 lib/eal/include/rte_counter.h | 150 ++++++++++++++++++++++++++++++++++
 2 files changed, 151 insertions(+)
 create mode 100644 lib/eal/include/rte_counter.h

diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
index e94b056d46..c070dd0079 100644
--- a/lib/eal/include/meson.build
+++ b/lib/eal/include/meson.build
@@ -12,6 +12,7 @@ headers += files(
         'rte_class.h',
         'rte_common.h',
         'rte_compat.h',
+        'rte_counter.h',
         'rte_debug.h',
         'rte_dev.h',
         'rte_devargs.h',
diff --git a/lib/eal/include/rte_counter.h b/lib/eal/include/rte_counter.h
new file mode 100644
index 0000000000..f0c2b71a6c
--- /dev/null
+++ b/lib/eal/include/rte_counter.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_COUNTER_H_
+#define _RTE_COUNTER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_compat.h>
+#include <rte_common.h>
+#include <rte_stdatomic.h>
+
+/**
+ * @file
+ * RTE Counter
+ *
+ * A counter is 64 bit value that is safe from split read/write.
+ * It assumes that only one cpu at a time  will update the counter,
+ * and another CPU may want to read it.
+ *
+ * This is a weaker subset of full atomic variables.
+ *
+ * The counters are subject to the restrictions of atomic variables
+ * in packed structures or unaligned.
+ */
+
+#ifdef RTE_ARCH_64
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * On native 64 bit platform, counter is implemented as basic
+ * 64 bit unsigned integer that only increases.
+ */
+typedef struct {
+	uint64_t current;
+	uint64_t offset;
+} rte_counter64_t;
+
+/**
+ * @internal
+ * Macro to implement read once (compiler barrier) using stdatomic.
+ * This is compiler barrier only.
+ */
+#define __rte_read_once(var)						\
+	rte_atomic_load_explicit((__rte_atomic typeof(&(var)))&(var),	\
+		rte_memory_order_consume)
+
+/**
+ * @internal
+ * Macro to implement write once (compiler barrier) using stdatomic.
+ * This is compiler barrier only.
+ */
+#define __rte_write_once(var, val)					    \
+	rte_atomic_store_explicit((__rte_atomic typeof(&(var)))&(var), val, \
+		rte_memory_order_release)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Add value to counter.
+ * Assumes this operation is only done by one thread on the object.
+ *
+ * @param counter
+ *    A pointer to the counter.
+ * @param val
+ *    The value to add to the counter.
+ */
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	counter->current += val;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Read a counter.
+ * This operation can be done by any thread.
+ *
+ * @param counter
+ *    A pointer to the counter.
+ * @return
+ *  The current value of the counter.
+ */
+__rte_experimental
+static inline uint64_t
+rte_counter64_read(const rte_counter64_t *counter)
+{
+	return __rte_read_once(counter->current) - __rte_read_once(counter->offset);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Reset a counter to zero.
+ * This operation can be done by any thread.
+ *
+ * @param counter
+ *    A pointer to the counter.
+ */
+__rte_experimental
+static inline void
+rte_counter64_reset(rte_counter64_t *counter)
+{
+	__rte_write_once(counter->offset, __rte_read_once(counter->current));
+}
+
+#else
+
+/* On 32 bit platform, need to use atomic to avoid load/store tearing */
+typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
+
+__rte_experimental
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
+}
+
+__rte_experimental
+static inline uint64_t
+rte_counter64_read(const rte_counter64_t *counter)
+{
+	return rte_atomic_load_explicit(counter, rte_memory_order_consume);
+}
+
+__rte_experimental
+static inline void
+rte_counter64_reset(rte_counter64_t *counter)
+{
+	rte_atomic_store_explicit(counter, 0, rte_memory_order_release);
+}
+
+#endif /* RTE_ARCH_64 */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_COUNTER_H_ */
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v9 2/8] ethdev: add common counters for statistics
  2024-05-21 20:16   ` [PATCH v9 0/8] Common statistics for SW PMD's Stephen Hemminger
  2024-05-21 20:16     ` [PATCH v9 1/8] eal: generic 64 bit counter Stephen Hemminger
@ 2024-05-21 20:16     ` Stephen Hemminger
  2024-05-21 20:16     ` [PATCH v9 3/8] net/af_packet: use generic SW stats Stephen Hemminger
                       ` (5 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 20:16 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko

Introduce common helper routines for keeping track of per-queue
statistics in SW PMD's. The code in several drivers had
copy/pasted the same code for this, but had common issues
with 64 bit counters on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/ethdev/ethdev_swstats.c | 101 ++++++++++++++++++++++++++++++++
 lib/ethdev/ethdev_swstats.h | 111 ++++++++++++++++++++++++++++++++++++
 lib/ethdev/meson.build      |   2 +
 lib/ethdev/version.map      |   3 +
 4 files changed, 217 insertions(+)
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

diff --git a/lib/ethdev/ethdev_swstats.c b/lib/ethdev/ethdev_swstats.c
new file mode 100644
index 0000000000..f7975bdea7
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.c
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+
+#include <rte_config.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
+#include "rte_ethdev.h"
+#include "ethdev_driver.h"
+#include "ethdev_swstats.h"
+
+int
+rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset,
+			   struct rte_eth_stats *stats)
+{
+	uint64_t packets, bytes, errors;
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		const void *txq = dev->data->tx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (txq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)txq + tx_offset);
+		packets = rte_counter64_read(&counters->packets);
+		bytes = rte_counter64_read(&counters->bytes);
+		errors = rte_counter64_read(&counters->errors);
+
+		stats->opackets += packets;
+		stats->obytes += bytes;
+		stats->oerrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_opackets[i] = packets;
+			stats->q_obytes[i] = bytes;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		const void *rxq = dev->data->rx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)rxq + rx_offset);
+		packets = rte_counter64_read(&counters->packets);
+		bytes = rte_counter64_read(&counters->bytes);
+		errors = rte_counter64_read(&counters->errors);
+
+		stats->ipackets += packets;
+		stats->ibytes += bytes;
+		stats->ierrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_ipackets[i] = packets;
+			stats->q_ibytes[i] = bytes;
+		}
+	}
+
+	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
+	return 0;
+}
+
+int
+rte_eth_counters_reset(struct rte_eth_dev *dev, size_t tx_offset, size_t rx_offset)
+{
+	struct rte_eth_counters *counters;
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		void *txq  = dev->data->tx_queues[i];
+
+		if (txq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)txq + tx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		void *rxq  = dev->data->rx_queues[i];
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)rxq + rx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	return 0;
+}
diff --git a/lib/ethdev/ethdev_swstats.h b/lib/ethdev/ethdev_swstats.h
new file mode 100644
index 0000000000..37b8e43eb0
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_ETHDEV_SWSTATS_H_
+#define _RTE_ETHDEV_SWSTATS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ *
+ * Internal statistics counters for software based devices.
+ * Hardware PMD's should use the hardware counters instead.
+ *
+ * This provides a library for PMD's to keep track of packets and bytes.
+ * It is assumed that this will be used per queue and queues are not
+ * shared by lcores.
+ */
+
+#include <rte_counter.h>
+
+/**
+ * A structure to be embedded in the device driver per-queue data.
+ */
+struct rte_eth_counters {
+	rte_counter64_t	packets;	/**< Total number of packets. */
+	rte_counter64_t	bytes;		/**< Total number of bytes. */
+	rte_counter64_t	errors;		/**< Total number of packets with errors. */
+};
+
+/**
+ * @internal
+ * Increment counters for a single packet.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ * @param packets
+ *    Number of packets to count
+ * @param bytes
+ *    Total size of all packet in bytes.
+ */
+__rte_internal
+static inline void
+rte_eth_count_packets(struct rte_eth_counters *counters,
+		      uint16_t packets, uint32_t bytes)
+{
+	rte_counter64_add(&counters->packets, packets);
+	rte_counter64_add(&counters->bytes, bytes);
+}
+
+/**
+ * @internal
+ * Increment error counter.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ */
+__rte_internal
+static inline void
+rte_eth_count_error(struct rte_eth_counters *counters)
+{
+	rte_counter64_add(&counters->errors, 1);
+}
+
+/**
+ * @internal
+ * Retrieve the general statistics for all queues.
+ * @see rte_eth_stats_get.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @param stats
+ *   A pointer to a structure of type *rte_eth_stats* to be filled
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			       size_t tx_offset, size_t rx_offset,
+			       struct rte_eth_stats *stats);
+
+/**
+ * @internal
+ * Reset the statistics for all queues.
+ * @see rte_eth_stats_reset.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_reset(struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHDEV_SWSTATS_H_ */
diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build
index f1d2586591..7ce29a46d4 100644
--- a/lib/ethdev/meson.build
+++ b/lib/ethdev/meson.build
@@ -3,6 +3,7 @@
 
 sources = files(
         'ethdev_driver.c',
+        'ethdev_swstats.c',
         'ethdev_private.c',
         'ethdev_profile.c',
         'ethdev_trace_points.c',
@@ -42,6 +43,7 @@ driver_sdk_headers += files(
         'ethdev_driver.h',
         'ethdev_pci.h',
         'ethdev_vdev.h',
+        'ethdev_swstats.h',
 )
 
 if is_linux
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 79f6f5293b..fc595be278 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -358,4 +358,7 @@ INTERNAL {
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
 	rte_flow_fp_default_ops;
+
+	rte_eth_counters_reset;
+	rte_eth_counters_stats_get;
 };
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v9 3/8] net/af_packet: use generic SW stats
  2024-05-21 20:16   ` [PATCH v9 0/8] Common statistics for SW PMD's Stephen Hemminger
  2024-05-21 20:16     ` [PATCH v9 1/8] eal: generic 64 bit counter Stephen Hemminger
  2024-05-21 20:16     ` [PATCH v9 2/8] ethdev: add common counters for statistics Stephen Hemminger
@ 2024-05-21 20:16     ` Stephen Hemminger
  2024-05-21 20:16     ` [PATCH v9 4/8] net/af_xdp: " Stephen Hemminger
                       ` (4 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 20:16 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, John W. Linville

Use the new generic SW stats.
Add a note about how errors and kernel full should be handled.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_packet/rte_eth_af_packet.c | 78 +++++------------------
 1 file changed, 17 insertions(+), 61 deletions(-)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 397a32db58..64fa519812 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -10,6 +10,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
@@ -51,8 +52,7 @@ struct pkt_rx_queue {
 	uint16_t in_port;
 	uint8_t vlan_strip;
 
-	volatile unsigned long rx_pkts;
-	volatile unsigned long rx_bytes;
+	struct rte_eth_counters stats;
 };
 
 struct pkt_tx_queue {
@@ -64,11 +64,10 @@ struct pkt_tx_queue {
 	unsigned int framecount;
 	unsigned int framenum;
 
-	volatile unsigned long tx_pkts;
-	volatile unsigned long err_pkts;
-	volatile unsigned long tx_bytes;
+	struct rte_eth_counters stats;
 };
 
+
 struct pmd_internals {
 	unsigned nb_queues;
 
@@ -168,9 +167,10 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		num_rx_bytes += mbuf->pkt_len;
 	}
 	pkt_q->framenum = framenum;
-	pkt_q->rx_pkts += num_rx;
-	pkt_q->rx_bytes += num_rx_bytes;
-	return num_rx;
+
+	rte_eth_count_packets(&pkt_q->stats, num_rx, num_rx_bytes);
+
+	return i;
 }
 
 /*
@@ -294,19 +294,16 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	if (sendto(pkt_q->sockfd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1 &&
 			errno != ENOBUFS && errno != EAGAIN) {
 		/*
-		 * In case of a ENOBUFS/EAGAIN error all of the enqueued
-		 * packets will be considered successful even though only some
-		 * are sent.
+		 * FIXME: if sendto fails kernel is busy should return 0
+		 * and not free the mbufs. Other errors should free the
+		 * buts and increment the tx error count.
 		 */
-
 		num_tx = 0;
 		num_tx_bytes = 0;
 	}
 
 	pkt_q->framenum = framenum;
-	pkt_q->tx_pkts += num_tx;
-	pkt_q->err_pkts += i - num_tx;
-	pkt_q->tx_bytes += num_tx_bytes;
+	rte_eth_count_packets(&pkt_q->stats, num_tx, num_tx_bytes);
 	return i;
 }
 
@@ -386,58 +383,17 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
-		igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
-		rx_total += igb_stats->q_ipackets[i];
-		rx_bytes_total += igb_stats->q_ibytes[i];
-	}
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
-		igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
-		tx_total += igb_stats->q_opackets[i];
-		tx_err_total += internal->tx_queue[i].err_pkts;
-		tx_bytes_total += igb_stats->q_obytes[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->ibytes = rx_bytes_total;
-	igb_stats->opackets = tx_total;
-	igb_stats->oerrors = tx_err_total;
-	igb_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+					  offsetof(struct pkt_rx_queue, stats), stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->rx_queue[i].rx_pkts = 0;
-		internal->rx_queue[i].rx_bytes = 0;
-	}
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->tx_queue[i].tx_pkts = 0;
-		internal->tx_queue[i].err_pkts = 0;
-		internal->tx_queue[i].tx_bytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 static int
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v9 4/8] net/af_xdp: use generic SW stats
  2024-05-21 20:16   ` [PATCH v9 0/8] Common statistics for SW PMD's Stephen Hemminger
                       ` (2 preceding siblings ...)
  2024-05-21 20:16     ` [PATCH v9 3/8] net/af_packet: use generic SW stats Stephen Hemminger
@ 2024-05-21 20:16     ` Stephen Hemminger
  2024-05-21 20:16     ` [PATCH v9 5/8] net/pcap: " Stephen Hemminger
                       ` (3 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 20:16 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Ciara Loftus

Use common code for all SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 85 ++++++++---------------------
 1 file changed, 22 insertions(+), 63 deletions(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 6ba455bb9b..c563621798 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -20,6 +20,7 @@
 #include <rte_ethdev.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
 #include <rte_string_fns.h>
@@ -120,19 +121,13 @@ struct xsk_umem_info {
 	uint32_t max_xsks;
 };
 
-struct rx_stats {
-	uint64_t rx_pkts;
-	uint64_t rx_bytes;
-	uint64_t rx_dropped;
-};
-
 struct pkt_rx_queue {
 	struct xsk_ring_cons rx;
 	struct xsk_umem_info *umem;
 	struct xsk_socket *xsk;
 	struct rte_mempool *mb_pool;
 
-	struct rx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct xsk_ring_prod fq;
 	struct xsk_ring_cons cq;
@@ -143,17 +138,11 @@ struct pkt_rx_queue {
 	int busy_budget;
 };
 
-struct tx_stats {
-	uint64_t tx_pkts;
-	uint64_t tx_bytes;
-	uint64_t tx_dropped;
-};
-
 struct pkt_tx_queue {
 	struct xsk_ring_prod tx;
 	struct xsk_umem_info *umem;
 
-	struct tx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct pkt_rx_queue *pair;
 	int xsk_queue_idx;
@@ -369,9 +358,7 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	xsk_ring_cons__release(rx, nb_pkts);
 	(void)reserve_fill_queue(umem, nb_pkts, fq_bufs, fq);
 
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
+	rte_eth_count_packets(&rxq->stats, nb_pkts, rx_bytes);
 
 	return nb_pkts;
 }
@@ -429,10 +416,7 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	}
 
 	xsk_ring_cons__release(rx, nb_pkts);
-
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
+	rte_eth_count_packets(&rxq->stats, nb_pkts, rx_bytes);
 
 	return nb_pkts;
 }
@@ -558,6 +542,7 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 					umem->mb_pool->header_size;
 			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
 			desc->addr = addr | offset;
+			tx_bytes += mbuf->pkt_len;
 			count++;
 		} else {
 			struct rte_mbuf *local_mbuf =
@@ -585,20 +570,17 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			desc->addr = addr | offset;
 			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
 					desc->len);
+			tx_bytes += mbuf->pkt_len;
 			rte_pktmbuf_free(mbuf);
 			count++;
 		}
-
-		tx_bytes += mbuf->pkt_len;
 	}
 
 out:
 	xsk_ring_prod__submit(&txq->tx, count);
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += count;
-	txq->stats.tx_bytes += tx_bytes;
-	txq->stats.tx_dropped += nb_pkts - count;
+	rte_eth_count_packets(&txq->stats, count, tx_bytes);
 
 	return count;
 }
@@ -648,8 +630,7 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += nb_pkts;
-	txq->stats.tx_bytes += tx_bytes;
+	rte_eth_count_packets(&txq->stats, nb_pkts, tx_bytes);
 
 	return nb_pkts;
 }
@@ -847,39 +828,26 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
 	struct pmd_process_private *process_private = dev->process_private;
-	struct xdp_statistics xdp_stats;
-	struct pkt_rx_queue *rxq;
-	struct pkt_tx_queue *txq;
-	socklen_t optlen;
-	int i, ret, fd;
+	unsigned int i;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		optlen = sizeof(struct xdp_statistics);
-		rxq = &internals->rx_queues[i];
-		txq = rxq->pair;
-		stats->q_ipackets[i] = rxq->stats.rx_pkts;
-		stats->q_ibytes[i] = rxq->stats.rx_bytes;
+	rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+				   offsetof(struct pkt_rx_queue, stats), stats);
 
-		stats->q_opackets[i] = txq->stats.tx_pkts;
-		stats->q_obytes[i] = txq->stats.tx_bytes;
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct xdp_statistics xdp_stats;
+		socklen_t optlen = sizeof(xdp_stats);
+		int fd;
 
-		stats->ipackets += stats->q_ipackets[i];
-		stats->ibytes += stats->q_ibytes[i];
-		stats->imissed += rxq->stats.rx_dropped;
-		stats->oerrors += txq->stats.tx_dropped;
 		fd = process_private->rxq_xsk_fds[i];
-		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
-					   &xdp_stats, &optlen) : -1;
-		if (ret != 0) {
+		if (fd < 0)
+			continue;
+		if (getsockopt(fd, SOL_XDP, XDP_STATISTICS,
+			       &xdp_stats, &optlen)  < 0) {
 			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
 			return -1;
 		}
 		stats->imissed += xdp_stats.rx_dropped;
-
-		stats->opackets += stats->q_opackets[i];
-		stats->obytes += stats->q_obytes[i];
 	}
 
 	return 0;
@@ -888,17 +856,8 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
-	int i;
-
-	for (i = 0; i < internals->queue_cnt; i++) {
-		memset(&internals->rx_queues[i].stats, 0,
-					sizeof(struct rx_stats));
-		memset(&internals->tx_queues[i].stats, 0,
-					sizeof(struct tx_stats));
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 #ifdef RTE_NET_AF_XDP_LIBBPF_XDP_ATTACH
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v9 5/8] net/pcap: use generic SW stats
  2024-05-21 20:16   ` [PATCH v9 0/8] Common statistics for SW PMD's Stephen Hemminger
                       ` (3 preceding siblings ...)
  2024-05-21 20:16     ` [PATCH v9 4/8] net/af_xdp: " Stephen Hemminger
@ 2024-05-21 20:16     ` Stephen Hemminger
  2024-05-21 20:16     ` [PATCH v9 6/8] net/ring: " Stephen Hemminger
                       ` (2 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 20:16 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use common statistics for SW drivers.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/pcap/pcap_ethdev.c | 100 ++++++++-------------------------
 1 file changed, 22 insertions(+), 78 deletions(-)

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index bfec085045..4689359527 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -11,6 +11,7 @@
 
 #include <rte_cycles.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <ethdev_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_malloc.h>
@@ -48,13 +49,6 @@ static uint8_t iface_idx;
 static uint64_t timestamp_rx_dynflag;
 static int timestamp_dynfield_offset = -1;
 
-struct queue_stat {
-	volatile unsigned long pkts;
-	volatile unsigned long bytes;
-	volatile unsigned long err_pkts;
-	volatile unsigned long rx_nombuf;
-};
-
 struct queue_missed_stat {
 	/* last value retrieved from pcap */
 	unsigned int pcap;
@@ -68,7 +62,7 @@ struct pcap_rx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
 	struct rte_mempool *mb_pool;
-	struct queue_stat rx_stat;
+	struct rte_eth_counters rx_stat;
 	struct queue_missed_stat missed_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
@@ -80,7 +74,7 @@ struct pcap_rx_queue {
 struct pcap_tx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
-	struct queue_stat tx_stat;
+	struct rte_eth_counters tx_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
 };
@@ -258,14 +252,13 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		bufs[i]->data_len = pcap_buf->data_len;
 		bufs[i]->pkt_len = pcap_buf->pkt_len;
 		bufs[i]->port = pcap_q->port_id;
-		rx_bytes += pcap_buf->data_len;
+		rx_bytes += pcap_buf->pkt_len;
 
 		/* Enqueue packet back on ring to allow infinite rx. */
 		rte_ring_enqueue(pcap_q->pkts, pcap_buf);
 	}
 
-	pcap_q->rx_stat.pkts += i;
-	pcap_q->rx_stat.bytes += rx_bytes;
+	rte_eth_count_packets(&pcap_q->rx_stat, i, rx_bytes);
 
 	return i;
 }
@@ -300,7 +293,9 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		mbuf = rte_pktmbuf_alloc(pcap_q->mb_pool);
 		if (unlikely(mbuf == NULL)) {
-			pcap_q->rx_stat.rx_nombuf++;
+			struct rte_eth_dev *dev = &rte_eth_devices[pcap_q->port_id];
+
+			++dev->data->rx_mbuf_alloc_failed;
 			break;
 		}
 
@@ -315,7 +310,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 						       mbuf,
 						       packet,
 						       header.caplen) == -1)) {
-				pcap_q->rx_stat.err_pkts++;
+				rte_eth_count_error(&pcap_q->rx_stat);
 				rte_pktmbuf_free(mbuf);
 				break;
 			}
@@ -332,9 +327,8 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		num_rx++;
 		rx_bytes += header.caplen;
 	}
-	pcap_q->rx_stat.pkts += num_rx;
-	pcap_q->rx_stat.bytes += rx_bytes;
 
+	rte_eth_count_packets(&pcap_q->rx_stat, num_rx, rx_bytes);
 	return num_rx;
 }
 
@@ -423,9 +417,8 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	 * we flush the pcap dumper within each burst.
 	 */
 	pcap_dump_flush(dumper);
-	dumper_q->tx_stat.pkts += num_tx;
-	dumper_q->tx_stat.bytes += tx_bytes;
-	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
+
+	rte_eth_count_packets(&dumper_q->tx_stat, num_tx, tx_bytes);
 
 	return nb_pkts;
 }
@@ -448,9 +441,7 @@ eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	tx_queue->tx_stat.pkts += nb_pkts;
-	tx_queue->tx_stat.bytes += tx_bytes;
-
+	rte_eth_count_packets(&tx_queue->tx_stat, nb_pkts, tx_bytes);
 	return i;
 }
 
@@ -502,9 +493,7 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		rte_pktmbuf_free(mbuf);
 	}
 
-	tx_queue->tx_stat.pkts += num_tx;
-	tx_queue->tx_stat.bytes += tx_bytes;
-	tx_queue->tx_stat.err_pkts += i - num_tx;
+	rte_eth_count_packets(&tx_queue->tx_stat, num_tx, tx_bytes);
 
 	return i;
 }
@@ -746,41 +735,12 @@ static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned int i;
-	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
-	unsigned long rx_missed_total = 0;
-	unsigned long rx_nombuf_total = 0, rx_err_total = 0;
-	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
-	unsigned long tx_packets_err_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_queue[i].rx_stat.pkts;
-		stats->q_ibytes[i] = internal->rx_queue[i].rx_stat.bytes;
-		rx_nombuf_total += internal->rx_queue[i].rx_stat.rx_nombuf;
-		rx_err_total += internal->rx_queue[i].rx_stat.err_pkts;
-		rx_packets_total += stats->q_ipackets[i];
-		rx_bytes_total += stats->q_ibytes[i];
-		rx_missed_total += queue_missed_stat_get(dev, i);
-	}
 
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_queue[i].tx_stat.pkts;
-		stats->q_obytes[i] = internal->tx_queue[i].tx_stat.bytes;
-		tx_packets_total += stats->q_opackets[i];
-		tx_bytes_total += stats->q_obytes[i];
-		tx_packets_err_total += internal->tx_queue[i].tx_stat.err_pkts;
-	}
+	rte_eth_counters_stats_get(dev, offsetof(struct pcap_tx_queue, tx_stat),
+				   offsetof(struct pcap_rx_queue, rx_stat), stats);
 
-	stats->ipackets = rx_packets_total;
-	stats->ibytes = rx_bytes_total;
-	stats->imissed = rx_missed_total;
-	stats->ierrors = rx_err_total;
-	stats->rx_nombuf = rx_nombuf_total;
-	stats->opackets = tx_packets_total;
-	stats->obytes = tx_bytes_total;
-	stats->oerrors = tx_packets_err_total;
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		stats->imissed += queue_missed_stat_get(dev, i);
 
 	return 0;
 }
@@ -789,21 +749,12 @@ static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
 	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		internal->rx_queue[i].rx_stat.pkts = 0;
-		internal->rx_queue[i].rx_stat.bytes = 0;
-		internal->rx_queue[i].rx_stat.err_pkts = 0;
-		internal->rx_queue[i].rx_stat.rx_nombuf = 0;
-		queue_missed_stat_reset(dev, i);
-	}
+	rte_eth_counters_reset(dev, offsetof(struct pcap_tx_queue, tx_stat),
+			       offsetof(struct pcap_rx_queue, rx_stat));
 
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		internal->tx_queue[i].tx_stat.pkts = 0;
-		internal->tx_queue[i].tx_stat.bytes = 0;
-		internal->tx_queue[i].tx_stat.err_pkts = 0;
-	}
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		queue_missed_stat_reset(dev, i);
 
 	return 0;
 }
@@ -929,13 +880,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 				pcap_pkt_count);
 			return -EINVAL;
 		}
-
-		/*
-		 * Reset the stats for this queue since eth_pcap_rx calls above
-		 * didn't result in the application receiving packets.
-		 */
-		pcap_q->rx_stat.pkts = 0;
-		pcap_q->rx_stat.bytes = 0;
 	}
 
 	return 0;
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v9 6/8] net/ring: use generic SW stats
  2024-05-21 20:16   ` [PATCH v9 0/8] Common statistics for SW PMD's Stephen Hemminger
                       ` (4 preceding siblings ...)
  2024-05-21 20:16     ` [PATCH v9 5/8] net/pcap: " Stephen Hemminger
@ 2024-05-21 20:16     ` Stephen Hemminger
  2024-05-21 20:16     ` [PATCH v9 7/8] net/tap: " Stephen Hemminger
  2024-05-21 20:16     ` [PATCH v9 8/8] net/null: " Stephen Hemminger
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 20:16 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Bruce Richardson

Use generic per-queue infrastructure.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/ring/rte_eth_ring.c | 63 +++++++++++----------------------
 1 file changed, 20 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index b16f5d55f2..36053e4038 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -7,6 +7,7 @@
 #include "rte_eth_ring.h"
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <rte_os_shim.h>
@@ -44,8 +45,8 @@ enum dev_action {
 
 struct ring_queue {
 	struct rte_ring *rng;
-	RTE_ATOMIC(uint64_t) rx_pkts;
-	RTE_ATOMIC(uint64_t) tx_pkts;
+
+	struct rte_eth_counters stats;
 };
 
 struct pmd_internals {
@@ -77,12 +78,12 @@ eth_ring_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
 	struct ring_queue *r = q;
-	const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SC_DEQ)
-		r->rx_pkts += nb_rx;
-	else
-		rte_atomic_fetch_add_explicit(&r->rx_pkts, nb_rx, rte_memory_order_relaxed);
+	uint16_t nb_rx;
+
+	nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	rte_counter64_add(&r->stats.packets, nb_rx);
+
 	return nb_rx;
 }
 
@@ -91,12 +92,12 @@ eth_ring_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
 	struct ring_queue *r = q;
-	const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SP_ENQ)
-		r->tx_pkts += nb_tx;
-	else
-		rte_atomic_fetch_add_explicit(&r->tx_pkts, nb_tx, rte_memory_order_relaxed);
+	uint16_t nb_tx;
+
+	nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	rte_counter64_add(&r->stats.packets, nb_tx);
+
 	return nb_tx;
 }
 
@@ -193,40 +194,16 @@ eth_dev_info(struct rte_eth_dev *dev,
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
-		rx_total += stats->q_ipackets[i];
-	}
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
-		tx_total += stats->q_opackets[i];
-	}
-
-	stats->ipackets = rx_total;
-	stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct ring_queue, stats),
+					  offsetof(struct ring_queue, stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		internal->rx_ring_queues[i].rx_pkts = 0;
-	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		internal->tx_ring_queues[i].tx_pkts = 0;
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct ring_queue, stats),
+				      offsetof(struct ring_queue, stats));
 }
 
 static void
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v9 7/8] net/tap: use generic SW stats
  2024-05-21 20:16   ` [PATCH v9 0/8] Common statistics for SW PMD's Stephen Hemminger
                       ` (5 preceding siblings ...)
  2024-05-21 20:16     ` [PATCH v9 6/8] net/ring: " Stephen Hemminger
@ 2024-05-21 20:16     ` Stephen Hemminger
  2024-05-21 20:16     ` [PATCH v9 8/8] net/null: " Stephen Hemminger
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 20:16 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use new common sw statistics.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/tap/rte_eth_tap.c | 75 ++++++-----------------------------
 drivers/net/tap/rte_eth_tap.h | 15 ++-----
 2 files changed, 16 insertions(+), 74 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 69d9da695b..9cc923fd0c 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -455,7 +455,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* Packet couldn't fit in the provided mbuf */
 		if (unlikely(rxq->pi.flags & TUN_PKT_STRIP)) {
-			rxq->stats.ierrors++;
+			rte_eth_count_error(&rxq->stats);
 			continue;
 		}
 
@@ -467,7 +467,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
 
 			if (unlikely(!buf)) {
-				rxq->stats.rx_nombuf++;
+				struct rte_eth_dev *dev = &rte_eth_devices[rxq->in_port];
+				++dev->data->rx_mbuf_alloc_failed;
+
 				/* No new buf has been allocated: do nothing */
 				if (!new_tail || !seg)
 					goto end;
@@ -512,8 +514,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		num_rx_bytes += mbuf->pkt_len;
 	}
 end:
-	rxq->stats.ipackets += num_rx;
-	rxq->stats.ibytes += num_rx_bytes;
+	rte_eth_count_packets(&rxq->stats, num_rx, num_rx_bytes);
 
 	if (trigger && num_rx < nb_pkts)
 		rxq->trigger_seen = trigger;
@@ -693,7 +694,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			tso_segsz = mbuf_in->tso_segsz + hdrs_len;
 			if (unlikely(tso_segsz == hdrs_len) ||
 				tso_segsz > *txq->mtu) {
-				txq->stats.errs++;
+				rte_eth_count_error(&txq->stats);
 				break;
 			}
 			gso_ctx->gso_size = tso_segsz;
@@ -731,7 +732,8 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		ret = tap_write_mbufs(txq, num_mbufs, mbuf,
 				&num_packets, &num_tx_bytes);
 		if (ret == -1) {
-			txq->stats.errs++;
+			rte_eth_count_error(&txq->stats);
+
 			/* free tso mbufs */
 			if (num_tso_mbufs > 0)
 				rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs);
@@ -749,9 +751,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		}
 	}
 
-	txq->stats.opackets += num_packets;
-	txq->stats.errs += nb_pkts - num_tx;
-	txq->stats.obytes += num_tx_bytes;
+	rte_eth_count_packets(&txq->stats, num_packets, num_tx_bytes);
 
 	return num_tx;
 }
@@ -1055,64 +1055,15 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
 {
-	unsigned int i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	unsigned long rx_nombuf = 0, ierrors = 0;
-	const struct pmd_internals *pmd = dev->data->dev_private;
-
-	/* rx queue statistics */
-	imax = (dev->data->nb_rx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_rx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
-		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
-		rx_total += tap_stats->q_ipackets[i];
-		rx_bytes_total += tap_stats->q_ibytes[i];
-		rx_nombuf += pmd->rxq[i].stats.rx_nombuf;
-		ierrors += pmd->rxq[i].stats.ierrors;
-	}
-
-	/* tx queue statistics */
-	imax = (dev->data->nb_tx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_tx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
-		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
-		tx_total += tap_stats->q_opackets[i];
-		tx_err_total += pmd->txq[i].stats.errs;
-		tx_bytes_total += tap_stats->q_obytes[i];
-	}
-
-	tap_stats->ipackets = rx_total;
-	tap_stats->ibytes = rx_bytes_total;
-	tap_stats->ierrors = ierrors;
-	tap_stats->rx_nombuf = rx_nombuf;
-	tap_stats->opackets = tx_total;
-	tap_stats->oerrors = tx_err_total;
-	tap_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct tx_queue, stats),
+					  offsetof(struct rx_queue, stats), tap_stats);
 }
 
 static int
 tap_stats_reset(struct rte_eth_dev *dev)
 {
-	int i;
-	struct pmd_internals *pmd = dev->data->dev_private;
-
-	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-		pmd->rxq[i].stats.ipackets = 0;
-		pmd->rxq[i].stats.ibytes = 0;
-		pmd->rxq[i].stats.ierrors = 0;
-		pmd->rxq[i].stats.rx_nombuf = 0;
-
-		pmd->txq[i].stats.opackets = 0;
-		pmd->txq[i].stats.errs = 0;
-		pmd->txq[i].stats.obytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct tx_queue, stats),
+				      offsetof(struct rx_queue, stats));
 }
 
 static int
diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
index 5ac93f93e9..8cba9ea410 100644
--- a/drivers/net/tap/rte_eth_tap.h
+++ b/drivers/net/tap/rte_eth_tap.h
@@ -14,6 +14,7 @@
 #include <linux/if_tun.h>
 
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_ether.h>
 #include <rte_gso.h>
 #include "tap_log.h"
@@ -32,23 +33,13 @@ enum rte_tuntap_type {
 	ETH_TUNTAP_TYPE_MAX,
 };
 
-struct pkt_stats {
-	uint64_t opackets;              /* Number of output packets */
-	uint64_t ipackets;              /* Number of input packets */
-	uint64_t obytes;                /* Number of bytes on output */
-	uint64_t ibytes;                /* Number of bytes on input */
-	uint64_t errs;                  /* Number of TX error packets */
-	uint64_t ierrors;               /* Number of RX error packets */
-	uint64_t rx_nombuf;             /* Nb of RX mbuf alloc failures */
-};
-
 struct rx_queue {
 	struct rte_mempool *mp;         /* Mempool for RX packets */
 	uint32_t trigger_seen;          /* Last seen Rx trigger value */
 	uint16_t in_port;               /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-	struct pkt_stats stats;         /* Stats for this RX queue */
 	uint16_t nb_rx_desc;            /* max number of mbufs available */
+	struct rte_eth_counters stats;  /* Stats for this RX queue */
 	struct rte_eth_rxmode *rxmode;  /* RX features */
 	struct rte_mbuf *pool;          /* mbufs pool for this queue */
 	struct iovec (*iovecs)[];       /* descriptors for this queue */
@@ -59,7 +50,7 @@ struct tx_queue {
 	int type;                       /* Type field - TUN|TAP */
 	uint16_t *mtu;                  /* Pointer to MTU from dev_data */
 	uint16_t csum:1;                /* Enable checksum offloading */
-	struct pkt_stats stats;         /* Stats for this TX queue */
+	struct rte_eth_counters stats;	/* Stats for this TX queue */
 	struct rte_gso_ctx gso_ctx;     /* GSO context */
 	uint16_t out_port;              /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v9 8/8] net/null: use generic SW stats
  2024-05-21 20:16   ` [PATCH v9 0/8] Common statistics for SW PMD's Stephen Hemminger
                       ` (6 preceding siblings ...)
  2024-05-21 20:16     ` [PATCH v9 7/8] net/tap: " Stephen Hemminger
@ 2024-05-21 20:16     ` Stephen Hemminger
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-21 20:16 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Tetsuya Mukawa

Use the new common code for statistics.
This also fixes the bug that this driver was not accounting
for bytes.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/null/rte_eth_null.c | 83 +++++++++------------------------
 1 file changed, 21 insertions(+), 62 deletions(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index f4ed3b8a7f..83add9c819 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -8,6 +8,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <bus_vdev_driver.h>
@@ -37,8 +38,8 @@ struct null_queue {
 	struct rte_mempool *mb_pool;
 	struct rte_mbuf *dummy_packet;
 
-	RTE_ATOMIC(uint64_t) rx_pkts;
-	RTE_ATOMIC(uint64_t) tx_pkts;
+	struct rte_eth_counters tx_stats;
+	struct rte_eth_counters rx_stats;
 };
 
 struct pmd_options {
@@ -101,9 +102,7 @@ eth_null_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->rx_pkts, i, rte_memory_order_seq_cst);
-
+	rte_eth_count_packets(&h->rx_stats, i, i * packet_size);
 	return i;
 }
 
@@ -129,8 +128,7 @@ eth_null_copy_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->rx_pkts, i, rte_memory_order_seq_cst);
+	rte_eth_count_packets(&h->rx_stats, i, i * packet_size);
 
 	return i;
 }
@@ -147,16 +145,17 @@ eth_null_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	int i;
 	struct null_queue *h = q;
+	uint32_t tx_bytes = 0;
 
 	if ((q == NULL) || (bufs == NULL))
 		return 0;
 
-	for (i = 0; i < nb_bufs; i++)
+	for (i = 0; i < nb_bufs; i++) {
+		tx_bytes += rte_pktmbuf_pkt_len(bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
+	}
 
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->tx_pkts, i, rte_memory_order_seq_cst);
-
+	rte_eth_count_packets(&h->tx_stats, i, tx_bytes);
 	return i;
 }
 
@@ -166,20 +165,20 @@ eth_null_copy_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	int i;
 	struct null_queue *h = q;
 	unsigned int packet_size;
+	uint32_t tx_bytes = 0;
 
 	if ((q == NULL) || (bufs == NULL))
 		return 0;
 
 	packet_size = h->internals->packet_size;
 	for (i = 0; i < nb_bufs; i++) {
+		tx_bytes += rte_pktmbuf_pkt_len(bufs[i]);
 		rte_memcpy(h->dummy_packet, rte_pktmbuf_mtod(bufs[i], void *),
 					packet_size);
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->tx_pkts, i, rte_memory_order_seq_cst);
-
+	rte_eth_count_packets(&h->tx_stats, i, tx_bytes);
 	return i;
 }
 
@@ -322,60 +321,20 @@ eth_dev_info(struct rte_eth_dev *dev,
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i, num_stats;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal;
-
-	if ((dev == NULL) || (igb_stats == NULL))
-		return -EINVAL;
-
-	internal = dev->data->dev_private;
-	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
-			RTE_MIN(dev->data->nb_rx_queues,
-				RTE_DIM(internal->rx_null_queues)));
-	for (i = 0; i < num_stats; i++) {
-		/* NOTE: review for atomic access */
-		igb_stats->q_ipackets[i] =
-			internal->rx_null_queues[i].rx_pkts;
-		rx_total += igb_stats->q_ipackets[i];
-	}
-
-	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
-			RTE_MIN(dev->data->nb_tx_queues,
-				RTE_DIM(internal->tx_null_queues)));
-	for (i = 0; i < num_stats; i++) {
-		/* NOTE: review for atomic access */
-		igb_stats->q_opackets[i] =
-			internal->tx_null_queues[i].tx_pkts;
-		tx_total += igb_stats->q_opackets[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev,
+					  offsetof(struct null_queue, tx_stats),
+					  offsetof(struct null_queue, rx_stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal;
-
-	if (dev == NULL)
-		return -EINVAL;
-
-	internal = dev->data->dev_private;
-	for (i = 0; i < RTE_DIM(internal->rx_null_queues); i++)
-		/* NOTE: review for atomic access */
-		internal->rx_null_queues[i].rx_pkts = 0;
-	for (i = 0; i < RTE_DIM(internal->tx_null_queues); i++)
-		/* NOTE: review for atomic access */
-		internal->tx_null_queues[i].tx_pkts = 0;
-
-	return 0;
+	return rte_eth_counters_reset(dev,
+				      offsetof(struct null_queue, tx_stats),
+				      offsetof(struct null_queue, rx_stats));
 }
 
 static void
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v9 1/8] eal: generic 64 bit counter
  2024-05-21 20:16     ` [PATCH v9 1/8] eal: generic 64 bit counter Stephen Hemminger
@ 2024-05-22  8:31       ` Morten Brørup
  2024-05-22 15:33         ` Stephen Hemminger
  2024-05-22 15:37         ` Stephen Hemminger
  0 siblings, 2 replies; 179+ messages in thread
From: Morten Brørup @ 2024-05-22  8:31 UTC (permalink / raw)
  To: Stephen Hemminger, dev; +Cc: Tyler Retzlaff

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Tuesday, 21 May 2024 22.17
> 
> This header implements 64 bit counters using atomic
> operations but with a weak memory ordering so that
> they are safe against load/store splits on 32 bit platforms.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---
>  lib/eal/include/meson.build   |   1 +
>  lib/eal/include/rte_counter.h | 150 ++++++++++++++++++++++++++++++++++
>  2 files changed, 151 insertions(+)
>  create mode 100644 lib/eal/include/rte_counter.h
> 
> diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
> index e94b056d46..c070dd0079 100644
> --- a/lib/eal/include/meson.build
> +++ b/lib/eal/include/meson.build
> @@ -12,6 +12,7 @@ headers += files(
>          'rte_class.h',
>          'rte_common.h',
>          'rte_compat.h',
> +        'rte_counter.h',
>          'rte_debug.h',
>          'rte_dev.h',
>          'rte_devargs.h',
> diff --git a/lib/eal/include/rte_counter.h b/lib/eal/include/rte_counter.h
> new file mode 100644
> index 0000000000..f0c2b71a6c
> --- /dev/null
> +++ b/lib/eal/include/rte_counter.h
> @@ -0,0 +1,150 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
> + */
> +
> +#ifndef _RTE_COUNTER_H_
> +#define _RTE_COUNTER_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <stdint.h>
> +#include <rte_compat.h>
> +#include <rte_common.h>
> +#include <rte_stdatomic.h>
> +
> +/**
> + * @file
> + * RTE Counter
> + *
> + * A counter is 64 bit value that is safe from split read/write.
> + * It assumes that only one cpu at a time  will update the counter,
> + * and another CPU may want to read it.
> + *
> + * This is a weaker subset of full atomic variables.
> + *
> + * The counters are subject to the restrictions of atomic variables
> + * in packed structures or unaligned.
> + */
> +
> +#ifdef RTE_ARCH_64
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * On native 64 bit platform, counter is implemented as basic
> + * 64 bit unsigned integer that only increases.
> + */
> +typedef struct {
> +	uint64_t current;
> +	uint64_t offset;
> +} rte_counter64_t;

As discussed in the other thread [1], I strongly prefer having "current" and "offset" separate, for performance reasons.
Keeping each offset close together with its counter will require more cache lines than necessary, because the offsets take up space in the hot part of a fast path data structure. E.g. the size_bins[] counters could fit into one cache line instead of two.

[1]: https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F422@smartserver.smartshare.dk/

The disadvantages of a slightly larger API is insignificant, compared to the performance cost of not separating them.

> +
> +/**
> + * @internal
> + * Macro to implement read once (compiler barrier) using stdatomic.
> + * This is compiler barrier only.
> + */
> +#define __rte_read_once(var)						\
> +	rte_atomic_load_explicit((__rte_atomic typeof(&(var)))&(var),	\
> +		rte_memory_order_consume)
> +
> +/**
> + * @internal
> + * Macro to implement write once (compiler barrier) using stdatomic.
> + * This is compiler barrier only.
> + */
> +#define __rte_write_once(var, val)					    \
> +	rte_atomic_store_explicit((__rte_atomic typeof(&(var)))&(var), val, \
> +		rte_memory_order_release)

These macros certainly make the code using them shorter.
But IMHO, they don't improve the readability of the code using them; quite the opposite. Reviewing code using atomics is hard, so I prefer having the memory order directly shown in the code, not hidden behind a macro.

> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Add value to counter.
> + * Assumes this operation is only done by one thread on the object.
> + *
> + * @param counter
> + *    A pointer to the counter.
> + * @param val
> + *    The value to add to the counter.
> + */
> +static inline void
> +rte_counter64_add(rte_counter64_t *counter, uint32_t val)
> +{
> +	counter->current += val;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Read a counter.
> + * This operation can be done by any thread.
> + *
> + * @param counter
> + *    A pointer to the counter.
> + * @return
> + *  The current value of the counter.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_counter64_read(const rte_counter64_t *counter)
> +{
> +	return __rte_read_once(counter->current) - __rte_read_once(counter-
> >offset);

I'm not sure that "current" needs to be read using rte_memory_order_consume here; I think rte_memory_order_consume for "offset" and rte_memory_order_relaxed (or perhaps just volatile) for "counter" suffices. But let's settle on the high level design before we start micro-optimizing. :-)

> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Reset a counter to zero.
> + * This operation can be done by any thread.
> + *
> + * @param counter
> + *    A pointer to the counter.
> + */
> +__rte_experimental
> +static inline void
> +rte_counter64_reset(rte_counter64_t *counter)
> +{
> +	__rte_write_once(counter->offset, __rte_read_once(counter->current));
> +}
> +
> +#else
> +
> +/* On 32 bit platform, need to use atomic to avoid load/store tearing */
> +typedef RTE_ATOMIC(uint64_t) rte_counter64_t;

As shown by Godbolt experiments discussed in a previous thread [2], non-tearing 64 bit counters can be implemented without using atomic instructions on all 32 bit architectures supported by DPDK. So we should use the counter/offset design pattern for RTE_ARCH_32 too.

[2]: https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F433@smartserver.smartshare.dk/

> +
> +__rte_experimental
> +static inline void
> +rte_counter64_add(rte_counter64_t *counter, uint32_t val)
> +{
> +	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
> +}
> +
> +__rte_experimental
> +static inline uint64_t
> +rte_counter64_read(const rte_counter64_t *counter)
> +{
> +	return rte_atomic_load_explicit(counter, rte_memory_order_consume);
> +}
> +
> +__rte_experimental
> +static inline void
> +rte_counter64_reset(rte_counter64_t *counter)
> +{
> +	rte_atomic_store_explicit(counter, 0, rte_memory_order_release);
> +}
> +
> +#endif /* RTE_ARCH_64 */
> +
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_COUNTER_H_ */
> --
> 2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v9 1/8] eal: generic 64 bit counter
  2024-05-22  8:31       ` Morten Brørup
@ 2024-05-22 15:33         ` Stephen Hemminger
  2024-05-22 18:09           ` Morten Brørup
  2024-05-22 15:37         ` Stephen Hemminger
  1 sibling, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-22 15:33 UTC (permalink / raw)
  To: Morten Brørup; +Cc: dev, Tyler Retzlaff

On Wed, 22 May 2024 10:31:39 +0200
Morten Brørup <mb@smartsharesystems.com> wrote:

> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change without prior notice.
> > + *
> > + * On native 64 bit platform, counter is implemented as basic
> > + * 64 bit unsigned integer that only increases.
> > + */
> > +typedef struct {
> > +	uint64_t current;
> > +	uint64_t offset;
> > +} rte_counter64_t;  
> 
> As discussed in the other thread [1], I strongly prefer having "current" and "offset" separate, for performance reasons.
> Keeping each offset close together with its counter will require more cache lines than necessary, because the offsets take up space in the hot part of a fast path data structure. E.g. the size_bins[] counters could fit into one cache line instead of two.
> 
> [1]: https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F422@smartserver.smartshare.dk/

There are no size_bins in the current version of the patch.
And the number of counters in ethdev part are small so it is less of a concern.
The code is easier to maintain if the counter object is self contained.

> > + * @internal
> > + * Macro to implement write once (compiler barrier) using stdatomic.
> > + * This is compiler barrier only.
> > + */
> > +#define __rte_write_once(var, val)					    \
> > +	rte_atomic_store_explicit((__rte_atomic typeof(&(var)))&(var), val, \
> > +		rte_memory_order_release)  
> 
> These macros certainly make the code using them shorter.
> But IMHO, they don't improve the readability of the code using them; quite the opposite. Reviewing code using atomics is hard, so I prefer having the memory order directly shown in the code, not hidden behind a macro.

Agree, was going to drop them in next version.

> > +__rte_experimental
> > +static inline uint64_t
> > +rte_counter64_read(const rte_counter64_t *counter)
> > +{
> > +	return __rte_read_once(counter->current) - __rte_read_once(counter-  
> > >offset);  
> 
> I'm not sure that "current" needs to be read using rte_memory_order_consume here; I think rte_memory_order_consume for "offset" and rte_memory_order_relaxed (or perhaps just volatile) for "counter" suffices. But let's settle on the high level design before we start micro-optimizing. :-)

memory order consume is compiler barrier only. Was trying to choose what was best here.

> > +
> > +/* On 32 bit platform, need to use atomic to avoid load/store tearing */
> > +typedef RTE_ATOMIC(uint64_t) rte_counter64_t;  
> 
> As shown by Godbolt experiments discussed in a previous thread [2], non-tearing 64 bit counters can be implemented without using atomic instructions on all 32 bit architectures supported by DPDK. So we should use the counter/offset design pattern for RTE_ARCH_32 too.
> 
> [2]: https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F433@smartserver.smartshare.dk/

Bruce found some 32 bit versions of x86 have the problem.
Godbolt doesn't seem to list 32 bit x86 compiler for Gcc.
If you try MSVC in 32 bit mode it will split the loads.
I see no problem on ARM which is the only other 32 bit we care about.



^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v9 1/8] eal: generic 64 bit counter
  2024-05-22  8:31       ` Morten Brørup
  2024-05-22 15:33         ` Stephen Hemminger
@ 2024-05-22 15:37         ` Stephen Hemminger
  2024-05-22 17:57           ` Morten Brørup
  1 sibling, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-22 15:37 UTC (permalink / raw)
  To: Morten Brørup; +Cc: dev, Tyler Retzlaff

On Wed, 22 May 2024 10:31:39 +0200
Morten Brørup <mb@smartsharesystems.com> wrote:

> > +/* On 32 bit platform, need to use atomic to avoid load/store tearing */
> > +typedef RTE_ATOMIC(uint64_t) rte_counter64_t;  
> 
> As shown by Godbolt experiments discussed in a previous thread [2], non-tearing 64 bit counters can be implemented without using atomic instructions on all 32 bit architectures supported by DPDK. So we should use the counter/offset design pattern for RTE_ARCH_32 too.
> 
> [2]: https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F433@smartserver.smartshare.dk/


This code built with -O3 and -m32 on godbolt shows split problem.

#include <stdint.h>

typedef uint64_t rte_counter64_t;

void
rte_counter64_add(rte_counter64_t *counter, uint32_t val)
{
	*counter += val;
}
…	*counter = val;
}

rte_counter64_add:
        push    ebx
        mov     eax, DWORD PTR [esp+8]
        xor     ebx, ebx
        mov     ecx, DWORD PTR [esp+12]
        add     DWORD PTR [eax], ecx
        adc     DWORD PTR [eax+4], ebx
        pop     ebx
        ret

rte_counter64_read:
        mov     eax, DWORD PTR [esp+4]
        mov     edx, DWORD PTR [eax+4]
        mov     eax, DWORD PTR [eax]
        ret
rte_counter64_set:
        movq    xmm0, QWORD PTR [esp+8]
        mov     eax, DWORD PTR [esp+4]
        movq    QWORD PTR [eax], xmm0
        ret

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v10 0/8] Common statistics for software PMD's
  2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
                     ` (11 preceding siblings ...)
  2024-05-21 20:16   ` [PATCH v9 0/8] Common statistics for SW PMD's Stephen Hemminger
@ 2024-05-22 16:12   ` Stephen Hemminger
  2024-05-22 16:12     ` [PATCH v10 1/8] eal: generic 64 bit counter Stephen Hemminger
                       ` (7 more replies)
  12 siblings, 8 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-22 16:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Adds common code for collecting basic statistics used by many
SW based PMD's such as af_packet, af_xdp, null, tap and ring.

This version uses two counters (current and offset) to implement
SMP safe reset on 64 bit platforms; this avoids having to do locked
operation in the fast path.  On 32 bit platforms, the compiler
will split access to 64 bit variables by default; therefore
it has to use std atomic.

By using common code, it also fixes missing counts in several drivers.
And fixes several bugs.

v10 - can use fast 64 bit counter on all architecture except I686
    - get rid of rte_read/write_once macros
    - adjust #include based on iwyu

Stephen Hemminger (8):
  eal: generic 64 bit counter
  ethdev: add common counters for statistics
  net/af_packet: use generic SW stats
  net/af_xdp: use generic SW stats
  net/pcap: use generic SW stats
  net/ring: use generic SW stats
  net/tap: use generic SW stats
  net/null: use generic SW stats

 drivers/net/af_packet/rte_eth_af_packet.c |  78 +++---------
 drivers/net/af_xdp/rte_eth_af_xdp.c       |  85 ++++---------
 drivers/net/null/rte_eth_null.c           |  83 ++++---------
 drivers/net/pcap/pcap_ethdev.c            | 100 ++++-----------
 drivers/net/ring/rte_eth_ring.c           |  63 +++-------
 drivers/net/tap/rte_eth_tap.c             |  75 ++----------
 drivers/net/tap/rte_eth_tap.h             |  15 +--
 lib/eal/include/meson.build               |   1 +
 lib/eal/include/rte_counter.h             | 141 ++++++++++++++++++++++
 lib/ethdev/ethdev_swstats.c               | 102 ++++++++++++++++
 lib/ethdev/ethdev_swstats.h               | 117 ++++++++++++++++++
 lib/ethdev/meson.build                    |   2 +
 lib/ethdev/version.map                    |   3 +
 13 files changed, 484 insertions(+), 381 deletions(-)
 create mode 100644 lib/eal/include/rte_counter.h
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v10 1/8] eal: generic 64 bit counter
  2024-05-22 16:12   ` [PATCH v10 0/8] Common statistics for software PMD's Stephen Hemminger
@ 2024-05-22 16:12     ` Stephen Hemminger
  2024-05-22 16:12     ` [PATCH v10 2/8] ethdev: add common counters for statistics Stephen Hemminger
                       ` (6 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-22 16:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Morten Brørup, Tyler Retzlaff

This header implements 64 bit counters that can be reset.
The counter increment in the fast path requires no locked
operations. Reading and resetting the counter is handled
by keeping track of a zero offset.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 lib/eal/include/meson.build   |   1 +
 lib/eal/include/rte_counter.h | 141 ++++++++++++++++++++++++++++++++++
 2 files changed, 142 insertions(+)
 create mode 100644 lib/eal/include/rte_counter.h

diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
index e94b056d46..c070dd0079 100644
--- a/lib/eal/include/meson.build
+++ b/lib/eal/include/meson.build
@@ -12,6 +12,7 @@ headers += files(
         'rte_class.h',
         'rte_common.h',
         'rte_compat.h',
+        'rte_counter.h',
         'rte_debug.h',
         'rte_dev.h',
         'rte_devargs.h',
diff --git a/lib/eal/include/rte_counter.h b/lib/eal/include/rte_counter.h
new file mode 100644
index 0000000000..7a90a328fd
--- /dev/null
+++ b/lib/eal/include/rte_counter.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_COUNTER_H_
+#define _RTE_COUNTER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_compat.h>
+#include <rte_common.h>
+#include <rte_stdatomic.h>
+
+/**
+ * @file
+ * RTE Counter
+ *
+ * A counter is 64 bit value that is safe from split read/write.
+ * It assumes that only one CPU at a time  will update the counter,
+ * and another CPU may want to read it.
+ *
+ * This is a weaker subset of full atomic variables.
+ *
+ * The counters are subject to the restrictions of atomic variables
+ * in packed structures or unaligned.
+ */
+
+#ifndef RTE_ARCH_I686
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * On platforms where 64 bit values are not split by compiler, the counter
+ * is implemented as basic 64 bit unsigned integer that only increases.
+ * The counter is reset by changing zero offset.
+ */
+typedef struct {
+	uint64_t current; /**< Monotonically increasing value. */
+	uint64_t offset;  /**< Offset of zero value. */
+} rte_counter64_t;
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Add value to counter.
+ * Assumes this operation is only done by one thread on the object.
+ *
+ * @param counter
+ *    A pointer to the counter.
+ * @param val
+ *    The value to add to the counter.
+ */
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	counter->current += val;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Reads the value of the current which is current value adjusted by the zero offset.
+ * This operation can be done by any thread.
+ *
+ * @param counter
+ *    A pointer to the counter.
+ * @return
+ *    The value of the counter.
+ */
+__rte_experimental
+static inline uint64_t
+rte_counter64_read(const rte_counter64_t *counter)
+{
+	uint64_t cur, offs;
+
+	cur = rte_atomic_load_explicit(&counter->current, rte_memory_order_consume);
+	offs = rte_atomic_load_explicit(&counter->offset, rte_memory_order_relaxed);
+
+	return cur - offs;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Reset a counter to zero by recording current value and making it the new zero offset.
+ * This operation can be done by any thread.
+ *
+ * @param counter
+ *    A pointer to the counter.
+ */
+__rte_experimental
+static inline void
+rte_counter64_reset(rte_counter64_t *counter)
+{
+	uint64_t cur;
+
+	cur = rte_atomic_load_explicit(&counter->current, rte_memory_order_consume);
+	rte_atomic_store_explicit(&counter->offset, cur, rte_memory_order_relaxed);
+}
+
+#else
+
+/* On x86 32 bit need to use atomic to avoid load/store tearing */
+typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
+
+__rte_experimental
+static inline void
+rte_counter64_add(rte_counter64_t *counter, uint32_t val)
+{
+	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
+}
+
+__rte_experimental
+static inline uint64_t
+rte_counter64_read(const rte_counter64_t *counter)
+{
+	return rte_atomic_load_explicit(counter, rte_memory_order_relaxed);
+}
+
+__rte_experimental
+static inline void
+rte_counter64_reset(rte_counter64_t *counter)
+{
+	rte_atomic_store_explicit(counter, 0, rte_memory_order_relaxed);
+}
+
+#endif /* RTE_ARCH_64 */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_COUNTER_H_ */
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v10 2/8] ethdev: add common counters for statistics
  2024-05-22 16:12   ` [PATCH v10 0/8] Common statistics for software PMD's Stephen Hemminger
  2024-05-22 16:12     ` [PATCH v10 1/8] eal: generic 64 bit counter Stephen Hemminger
@ 2024-05-22 16:12     ` Stephen Hemminger
  2024-05-22 16:12     ` [PATCH v10 3/8] net/af_packet: use generic SW stats Stephen Hemminger
                       ` (5 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-22 16:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko

Introduce common helper routines for keeping track of per-queue
statistics in SW PMD's. The code in several drivers had
copy/pasted the same code for this, but had common issues
with 64 bit counters on 32 bit platforms.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/ethdev/ethdev_swstats.c | 102 +++++++++++++++++++++++++++++++
 lib/ethdev/ethdev_swstats.h | 117 ++++++++++++++++++++++++++++++++++++
 lib/ethdev/meson.build      |   2 +
 lib/ethdev/version.map      |   3 +
 4 files changed, 224 insertions(+)
 create mode 100644 lib/ethdev/ethdev_swstats.c
 create mode 100644 lib/ethdev/ethdev_swstats.h

diff --git a/lib/ethdev/ethdev_swstats.c b/lib/ethdev/ethdev_swstats.c
new file mode 100644
index 0000000000..e5560d2310
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.c
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <rte_config.h>
+#include <rte_counter.h>
+#include <rte_ethdev.h>
+
+#include "ethdev_driver.h"
+#include "ethdev_swstats.h"
+
+int
+rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset,
+			   struct rte_eth_stats *stats)
+{
+	uint64_t packets, bytes, errors;
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		const void *txq = dev->data->tx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (txq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)txq + tx_offset);
+		packets = rte_counter64_read(&counters->packets);
+		bytes = rte_counter64_read(&counters->bytes);
+		errors = rte_counter64_read(&counters->errors);
+
+		stats->opackets += packets;
+		stats->obytes += bytes;
+		stats->oerrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_opackets[i] = packets;
+			stats->q_obytes[i] = bytes;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		const void *rxq = dev->data->rx_queues[i];
+		const struct rte_eth_counters *counters;
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (const struct rte_eth_counters *)((const char *)rxq + rx_offset);
+		packets = rte_counter64_read(&counters->packets);
+		bytes = rte_counter64_read(&counters->bytes);
+		errors = rte_counter64_read(&counters->errors);
+
+		stats->ipackets += packets;
+		stats->ibytes += bytes;
+		stats->ierrors += errors;
+
+		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			stats->q_ipackets[i] = packets;
+			stats->q_ibytes[i] = bytes;
+		}
+	}
+
+	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
+	return 0;
+}
+
+int
+rte_eth_counters_reset(struct rte_eth_dev *dev, size_t tx_offset, size_t rx_offset)
+{
+	struct rte_eth_counters *counters;
+	unsigned int i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		void *txq  = dev->data->tx_queues[i];
+
+		if (txq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)txq + tx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		void *rxq  = dev->data->rx_queues[i];
+
+		if (rxq == NULL)
+			continue;
+
+		counters = (struct rte_eth_counters *)((char *)rxq + rx_offset);
+		rte_counter64_reset(&counters->packets);
+		rte_counter64_reset(&counters->bytes);
+		rte_counter64_reset(&counters->errors);
+	}
+
+	return 0;
+}
diff --git a/lib/ethdev/ethdev_swstats.h b/lib/ethdev/ethdev_swstats.h
new file mode 100644
index 0000000000..c149b7697e
--- /dev/null
+++ b/lib/ethdev/ethdev_swstats.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _RTE_ETHDEV_SWSTATS_H_
+#define _RTE_ETHDEV_SWSTATS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ *
+ * Internal statistics counters for software based devices.
+ * Hardware PMD's should use the hardware counters instead.
+ *
+ * This provides a library for PMD's to keep track of packets and bytes.
+ * It is assumed that this will be used per queue and queues are not
+ * shared by lcores.
+ */
+#include <stddef.h>
+#include <stdint.h>
+
+#include <rte_compat.h>
+#include <rte_counter.h>
+
+struct rte_eth_dev;
+struct rte_eth_stats;
+
+/**
+ * A structure to be embedded in the device driver per-queue data.
+ */
+struct rte_eth_counters {
+	rte_counter64_t	packets;	/**< Total number of packets. */
+	rte_counter64_t	bytes;		/**< Total number of bytes. */
+	rte_counter64_t	errors;		/**< Total number of packets with errors. */
+};
+
+/**
+ * @internal
+ * Increment counters for a single packet.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ * @param packets
+ *    Number of packets to count
+ * @param bytes
+ *    Total size of all packet in bytes.
+ */
+__rte_internal
+static inline void
+rte_eth_count_packets(struct rte_eth_counters *counters,
+		      uint16_t packets, uint32_t bytes)
+{
+	rte_counter64_add(&counters->packets, packets);
+	rte_counter64_add(&counters->bytes, bytes);
+}
+
+/**
+ * @internal
+ * Increment error counter.
+ *
+ * @param counters
+ *    Pointer to queue structure containing counters.
+ */
+__rte_internal
+static inline void
+rte_eth_count_error(struct rte_eth_counters *counters)
+{
+	rte_counter64_add(&counters->errors, 1);
+}
+
+/**
+ * @internal
+ * Retrieve the general statistics for all queues.
+ * @see rte_eth_stats_get.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @param stats
+ *   A pointer to a structure of type *rte_eth_stats* to be filled
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_stats_get(const struct rte_eth_dev *dev,
+			       size_t tx_offset, size_t rx_offset,
+			       struct rte_eth_stats *stats);
+
+/**
+ * @internal
+ * Reset the statistics for all queues.
+ * @see rte_eth_stats_reset.
+ *
+ * @param dev
+ *    Pointer to the Ethernet device structure.
+ * @param tx_offset
+ *    Offset from the tx_queue structure where stats are located.
+ * @param rx_offset
+ *    Offset from the rx_queue structure where stats are located.
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+__rte_internal
+int rte_eth_counters_reset(struct rte_eth_dev *dev,
+			   size_t tx_offset, size_t rx_offset);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHDEV_SWSTATS_H_ */
diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build
index f1d2586591..7ce29a46d4 100644
--- a/lib/ethdev/meson.build
+++ b/lib/ethdev/meson.build
@@ -3,6 +3,7 @@
 
 sources = files(
         'ethdev_driver.c',
+        'ethdev_swstats.c',
         'ethdev_private.c',
         'ethdev_profile.c',
         'ethdev_trace_points.c',
@@ -42,6 +43,7 @@ driver_sdk_headers += files(
         'ethdev_driver.h',
         'ethdev_pci.h',
         'ethdev_vdev.h',
+        'ethdev_swstats.h',
 )
 
 if is_linux
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 79f6f5293b..fc595be278 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -358,4 +358,7 @@ INTERNAL {
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
 	rte_flow_fp_default_ops;
+
+	rte_eth_counters_reset;
+	rte_eth_counters_stats_get;
 };
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v10 3/8] net/af_packet: use generic SW stats
  2024-05-22 16:12   ` [PATCH v10 0/8] Common statistics for software PMD's Stephen Hemminger
  2024-05-22 16:12     ` [PATCH v10 1/8] eal: generic 64 bit counter Stephen Hemminger
  2024-05-22 16:12     ` [PATCH v10 2/8] ethdev: add common counters for statistics Stephen Hemminger
@ 2024-05-22 16:12     ` Stephen Hemminger
  2024-05-22 16:12     ` [PATCH v10 4/8] net/af_xdp: " Stephen Hemminger
                       ` (4 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-22 16:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, John W. Linville

Use the new generic SW stats.
Add a note about how errors and kernel full should be handled.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_packet/rte_eth_af_packet.c | 78 +++++------------------
 1 file changed, 17 insertions(+), 61 deletions(-)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 397a32db58..64fa519812 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -10,6 +10,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
@@ -51,8 +52,7 @@ struct pkt_rx_queue {
 	uint16_t in_port;
 	uint8_t vlan_strip;
 
-	volatile unsigned long rx_pkts;
-	volatile unsigned long rx_bytes;
+	struct rte_eth_counters stats;
 };
 
 struct pkt_tx_queue {
@@ -64,11 +64,10 @@ struct pkt_tx_queue {
 	unsigned int framecount;
 	unsigned int framenum;
 
-	volatile unsigned long tx_pkts;
-	volatile unsigned long err_pkts;
-	volatile unsigned long tx_bytes;
+	struct rte_eth_counters stats;
 };
 
+
 struct pmd_internals {
 	unsigned nb_queues;
 
@@ -168,9 +167,10 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		num_rx_bytes += mbuf->pkt_len;
 	}
 	pkt_q->framenum = framenum;
-	pkt_q->rx_pkts += num_rx;
-	pkt_q->rx_bytes += num_rx_bytes;
-	return num_rx;
+
+	rte_eth_count_packets(&pkt_q->stats, num_rx, num_rx_bytes);
+
+	return i;
 }
 
 /*
@@ -294,19 +294,16 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	if (sendto(pkt_q->sockfd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1 &&
 			errno != ENOBUFS && errno != EAGAIN) {
 		/*
-		 * In case of a ENOBUFS/EAGAIN error all of the enqueued
-		 * packets will be considered successful even though only some
-		 * are sent.
+		 * FIXME: if sendto fails kernel is busy should return 0
+		 * and not free the mbufs. Other errors should free the
+		 * buts and increment the tx error count.
 		 */
-
 		num_tx = 0;
 		num_tx_bytes = 0;
 	}
 
 	pkt_q->framenum = framenum;
-	pkt_q->tx_pkts += num_tx;
-	pkt_q->err_pkts += i - num_tx;
-	pkt_q->tx_bytes += num_tx_bytes;
+	rte_eth_count_packets(&pkt_q->stats, num_tx, num_tx_bytes);
 	return i;
 }
 
@@ -386,58 +383,17 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
-		igb_stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
-		rx_total += igb_stats->q_ipackets[i];
-		rx_bytes_total += igb_stats->q_ibytes[i];
-	}
-
-	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS ?
-	        internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS);
-	for (i = 0; i < imax; i++) {
-		igb_stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
-		igb_stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
-		tx_total += igb_stats->q_opackets[i];
-		tx_err_total += internal->tx_queue[i].err_pkts;
-		tx_bytes_total += igb_stats->q_obytes[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->ibytes = rx_bytes_total;
-	igb_stats->opackets = tx_total;
-	igb_stats->oerrors = tx_err_total;
-	igb_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+					  offsetof(struct pkt_rx_queue, stats), stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->rx_queue[i].rx_pkts = 0;
-		internal->rx_queue[i].rx_bytes = 0;
-	}
-
-	for (i = 0; i < internal->nb_queues; i++) {
-		internal->tx_queue[i].tx_pkts = 0;
-		internal->tx_queue[i].err_pkts = 0;
-		internal->tx_queue[i].tx_bytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 static int
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v10 4/8] net/af_xdp: use generic SW stats
  2024-05-22 16:12   ` [PATCH v10 0/8] Common statistics for software PMD's Stephen Hemminger
                       ` (2 preceding siblings ...)
  2024-05-22 16:12     ` [PATCH v10 3/8] net/af_packet: use generic SW stats Stephen Hemminger
@ 2024-05-22 16:12     ` Stephen Hemminger
  2024-05-22 16:12     ` [PATCH v10 5/8] net/pcap: " Stephen Hemminger
                       ` (3 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-22 16:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Ciara Loftus

Use common code for all SW stats.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 85 ++++++++---------------------
 1 file changed, 22 insertions(+), 63 deletions(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 6ba455bb9b..c563621798 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -20,6 +20,7 @@
 #include <rte_ethdev.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_kvargs.h>
 #include <bus_vdev_driver.h>
 #include <rte_string_fns.h>
@@ -120,19 +121,13 @@ struct xsk_umem_info {
 	uint32_t max_xsks;
 };
 
-struct rx_stats {
-	uint64_t rx_pkts;
-	uint64_t rx_bytes;
-	uint64_t rx_dropped;
-};
-
 struct pkt_rx_queue {
 	struct xsk_ring_cons rx;
 	struct xsk_umem_info *umem;
 	struct xsk_socket *xsk;
 	struct rte_mempool *mb_pool;
 
-	struct rx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct xsk_ring_prod fq;
 	struct xsk_ring_cons cq;
@@ -143,17 +138,11 @@ struct pkt_rx_queue {
 	int busy_budget;
 };
 
-struct tx_stats {
-	uint64_t tx_pkts;
-	uint64_t tx_bytes;
-	uint64_t tx_dropped;
-};
-
 struct pkt_tx_queue {
 	struct xsk_ring_prod tx;
 	struct xsk_umem_info *umem;
 
-	struct tx_stats stats;
+	struct rte_eth_counters stats;
 
 	struct pkt_rx_queue *pair;
 	int xsk_queue_idx;
@@ -369,9 +358,7 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	xsk_ring_cons__release(rx, nb_pkts);
 	(void)reserve_fill_queue(umem, nb_pkts, fq_bufs, fq);
 
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
+	rte_eth_count_packets(&rxq->stats, nb_pkts, rx_bytes);
 
 	return nb_pkts;
 }
@@ -429,10 +416,7 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	}
 
 	xsk_ring_cons__release(rx, nb_pkts);
-
-	/* statistics */
-	rxq->stats.rx_pkts += nb_pkts;
-	rxq->stats.rx_bytes += rx_bytes;
+	rte_eth_count_packets(&rxq->stats, nb_pkts, rx_bytes);
 
 	return nb_pkts;
 }
@@ -558,6 +542,7 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 					umem->mb_pool->header_size;
 			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
 			desc->addr = addr | offset;
+			tx_bytes += mbuf->pkt_len;
 			count++;
 		} else {
 			struct rte_mbuf *local_mbuf =
@@ -585,20 +570,17 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			desc->addr = addr | offset;
 			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
 					desc->len);
+			tx_bytes += mbuf->pkt_len;
 			rte_pktmbuf_free(mbuf);
 			count++;
 		}
-
-		tx_bytes += mbuf->pkt_len;
 	}
 
 out:
 	xsk_ring_prod__submit(&txq->tx, count);
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += count;
-	txq->stats.tx_bytes += tx_bytes;
-	txq->stats.tx_dropped += nb_pkts - count;
+	rte_eth_count_packets(&txq->stats, count, tx_bytes);
 
 	return count;
 }
@@ -648,8 +630,7 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	kick_tx(txq, cq);
 
-	txq->stats.tx_pkts += nb_pkts;
-	txq->stats.tx_bytes += tx_bytes;
+	rte_eth_count_packets(&txq->stats, nb_pkts, tx_bytes);
 
 	return nb_pkts;
 }
@@ -847,39 +828,26 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
 	struct pmd_process_private *process_private = dev->process_private;
-	struct xdp_statistics xdp_stats;
-	struct pkt_rx_queue *rxq;
-	struct pkt_tx_queue *txq;
-	socklen_t optlen;
-	int i, ret, fd;
+	unsigned int i;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		optlen = sizeof(struct xdp_statistics);
-		rxq = &internals->rx_queues[i];
-		txq = rxq->pair;
-		stats->q_ipackets[i] = rxq->stats.rx_pkts;
-		stats->q_ibytes[i] = rxq->stats.rx_bytes;
+	rte_eth_counters_stats_get(dev, offsetof(struct pkt_tx_queue, stats),
+				   offsetof(struct pkt_rx_queue, stats), stats);
 
-		stats->q_opackets[i] = txq->stats.tx_pkts;
-		stats->q_obytes[i] = txq->stats.tx_bytes;
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct xdp_statistics xdp_stats;
+		socklen_t optlen = sizeof(xdp_stats);
+		int fd;
 
-		stats->ipackets += stats->q_ipackets[i];
-		stats->ibytes += stats->q_ibytes[i];
-		stats->imissed += rxq->stats.rx_dropped;
-		stats->oerrors += txq->stats.tx_dropped;
 		fd = process_private->rxq_xsk_fds[i];
-		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
-					   &xdp_stats, &optlen) : -1;
-		if (ret != 0) {
+		if (fd < 0)
+			continue;
+		if (getsockopt(fd, SOL_XDP, XDP_STATISTICS,
+			       &xdp_stats, &optlen)  < 0) {
 			AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
 			return -1;
 		}
 		stats->imissed += xdp_stats.rx_dropped;
-
-		stats->opackets += stats->q_opackets[i];
-		stats->obytes += stats->q_obytes[i];
 	}
 
 	return 0;
@@ -888,17 +856,8 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	struct pmd_internals *internals = dev->data->dev_private;
-	int i;
-
-	for (i = 0; i < internals->queue_cnt; i++) {
-		memset(&internals->rx_queues[i].stats, 0,
-					sizeof(struct rx_stats));
-		memset(&internals->tx_queues[i].stats, 0,
-					sizeof(struct tx_stats));
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct pkt_tx_queue, stats),
+				      offsetof(struct pkt_rx_queue, stats));
 }
 
 #ifdef RTE_NET_AF_XDP_LIBBPF_XDP_ATTACH
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v10 5/8] net/pcap: use generic SW stats
  2024-05-22 16:12   ` [PATCH v10 0/8] Common statistics for software PMD's Stephen Hemminger
                       ` (3 preceding siblings ...)
  2024-05-22 16:12     ` [PATCH v10 4/8] net/af_xdp: " Stephen Hemminger
@ 2024-05-22 16:12     ` Stephen Hemminger
  2024-05-22 16:12     ` [PATCH v10 6/8] net/ring: " Stephen Hemminger
                       ` (2 subsequent siblings)
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-22 16:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use common statistics for SW drivers.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/pcap/pcap_ethdev.c | 100 ++++++++-------------------------
 1 file changed, 22 insertions(+), 78 deletions(-)

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index bfec085045..4689359527 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -11,6 +11,7 @@
 
 #include <rte_cycles.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <ethdev_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_malloc.h>
@@ -48,13 +49,6 @@ static uint8_t iface_idx;
 static uint64_t timestamp_rx_dynflag;
 static int timestamp_dynfield_offset = -1;
 
-struct queue_stat {
-	volatile unsigned long pkts;
-	volatile unsigned long bytes;
-	volatile unsigned long err_pkts;
-	volatile unsigned long rx_nombuf;
-};
-
 struct queue_missed_stat {
 	/* last value retrieved from pcap */
 	unsigned int pcap;
@@ -68,7 +62,7 @@ struct pcap_rx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
 	struct rte_mempool *mb_pool;
-	struct queue_stat rx_stat;
+	struct rte_eth_counters rx_stat;
 	struct queue_missed_stat missed_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
@@ -80,7 +74,7 @@ struct pcap_rx_queue {
 struct pcap_tx_queue {
 	uint16_t port_id;
 	uint16_t queue_id;
-	struct queue_stat tx_stat;
+	struct rte_eth_counters tx_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
 };
@@ -258,14 +252,13 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		bufs[i]->data_len = pcap_buf->data_len;
 		bufs[i]->pkt_len = pcap_buf->pkt_len;
 		bufs[i]->port = pcap_q->port_id;
-		rx_bytes += pcap_buf->data_len;
+		rx_bytes += pcap_buf->pkt_len;
 
 		/* Enqueue packet back on ring to allow infinite rx. */
 		rte_ring_enqueue(pcap_q->pkts, pcap_buf);
 	}
 
-	pcap_q->rx_stat.pkts += i;
-	pcap_q->rx_stat.bytes += rx_bytes;
+	rte_eth_count_packets(&pcap_q->rx_stat, i, rx_bytes);
 
 	return i;
 }
@@ -300,7 +293,9 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		mbuf = rte_pktmbuf_alloc(pcap_q->mb_pool);
 		if (unlikely(mbuf == NULL)) {
-			pcap_q->rx_stat.rx_nombuf++;
+			struct rte_eth_dev *dev = &rte_eth_devices[pcap_q->port_id];
+
+			++dev->data->rx_mbuf_alloc_failed;
 			break;
 		}
 
@@ -315,7 +310,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 						       mbuf,
 						       packet,
 						       header.caplen) == -1)) {
-				pcap_q->rx_stat.err_pkts++;
+				rte_eth_count_error(&pcap_q->rx_stat);
 				rte_pktmbuf_free(mbuf);
 				break;
 			}
@@ -332,9 +327,8 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		num_rx++;
 		rx_bytes += header.caplen;
 	}
-	pcap_q->rx_stat.pkts += num_rx;
-	pcap_q->rx_stat.bytes += rx_bytes;
 
+	rte_eth_count_packets(&pcap_q->rx_stat, num_rx, rx_bytes);
 	return num_rx;
 }
 
@@ -423,9 +417,8 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	 * we flush the pcap dumper within each burst.
 	 */
 	pcap_dump_flush(dumper);
-	dumper_q->tx_stat.pkts += num_tx;
-	dumper_q->tx_stat.bytes += tx_bytes;
-	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
+
+	rte_eth_count_packets(&dumper_q->tx_stat, num_tx, tx_bytes);
 
 	return nb_pkts;
 }
@@ -448,9 +441,7 @@ eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	tx_queue->tx_stat.pkts += nb_pkts;
-	tx_queue->tx_stat.bytes += tx_bytes;
-
+	rte_eth_count_packets(&tx_queue->tx_stat, nb_pkts, tx_bytes);
 	return i;
 }
 
@@ -502,9 +493,7 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		rte_pktmbuf_free(mbuf);
 	}
 
-	tx_queue->tx_stat.pkts += num_tx;
-	tx_queue->tx_stat.bytes += tx_bytes;
-	tx_queue->tx_stat.err_pkts += i - num_tx;
+	rte_eth_count_packets(&tx_queue->tx_stat, num_tx, tx_bytes);
 
 	return i;
 }
@@ -746,41 +735,12 @@ static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned int i;
-	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
-	unsigned long rx_missed_total = 0;
-	unsigned long rx_nombuf_total = 0, rx_err_total = 0;
-	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
-	unsigned long tx_packets_err_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_queue[i].rx_stat.pkts;
-		stats->q_ibytes[i] = internal->rx_queue[i].rx_stat.bytes;
-		rx_nombuf_total += internal->rx_queue[i].rx_stat.rx_nombuf;
-		rx_err_total += internal->rx_queue[i].rx_stat.err_pkts;
-		rx_packets_total += stats->q_ipackets[i];
-		rx_bytes_total += stats->q_ibytes[i];
-		rx_missed_total += queue_missed_stat_get(dev, i);
-	}
 
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_queue[i].tx_stat.pkts;
-		stats->q_obytes[i] = internal->tx_queue[i].tx_stat.bytes;
-		tx_packets_total += stats->q_opackets[i];
-		tx_bytes_total += stats->q_obytes[i];
-		tx_packets_err_total += internal->tx_queue[i].tx_stat.err_pkts;
-	}
+	rte_eth_counters_stats_get(dev, offsetof(struct pcap_tx_queue, tx_stat),
+				   offsetof(struct pcap_rx_queue, rx_stat), stats);
 
-	stats->ipackets = rx_packets_total;
-	stats->ibytes = rx_bytes_total;
-	stats->imissed = rx_missed_total;
-	stats->ierrors = rx_err_total;
-	stats->rx_nombuf = rx_nombuf_total;
-	stats->opackets = tx_packets_total;
-	stats->obytes = tx_bytes_total;
-	stats->oerrors = tx_packets_err_total;
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		stats->imissed += queue_missed_stat_get(dev, i);
 
 	return 0;
 }
@@ -789,21 +749,12 @@ static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
 	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		internal->rx_queue[i].rx_stat.pkts = 0;
-		internal->rx_queue[i].rx_stat.bytes = 0;
-		internal->rx_queue[i].rx_stat.err_pkts = 0;
-		internal->rx_queue[i].rx_stat.rx_nombuf = 0;
-		queue_missed_stat_reset(dev, i);
-	}
+	rte_eth_counters_reset(dev, offsetof(struct pcap_tx_queue, tx_stat),
+			       offsetof(struct pcap_rx_queue, rx_stat));
 
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		internal->tx_queue[i].tx_stat.pkts = 0;
-		internal->tx_queue[i].tx_stat.bytes = 0;
-		internal->tx_queue[i].tx_stat.err_pkts = 0;
-	}
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		queue_missed_stat_reset(dev, i);
 
 	return 0;
 }
@@ -929,13 +880,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 				pcap_pkt_count);
 			return -EINVAL;
 		}
-
-		/*
-		 * Reset the stats for this queue since eth_pcap_rx calls above
-		 * didn't result in the application receiving packets.
-		 */
-		pcap_q->rx_stat.pkts = 0;
-		pcap_q->rx_stat.bytes = 0;
 	}
 
 	return 0;
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v10 6/8] net/ring: use generic SW stats
  2024-05-22 16:12   ` [PATCH v10 0/8] Common statistics for software PMD's Stephen Hemminger
                       ` (4 preceding siblings ...)
  2024-05-22 16:12     ` [PATCH v10 5/8] net/pcap: " Stephen Hemminger
@ 2024-05-22 16:12     ` Stephen Hemminger
  2024-05-22 16:12     ` [PATCH v10 7/8] net/tap: " Stephen Hemminger
  2024-05-22 16:12     ` [PATCH v10 8/8] net/null: " Stephen Hemminger
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-22 16:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Bruce Richardson

Use generic per-queue infrastructure.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/ring/rte_eth_ring.c | 63 +++++++++++----------------------
 1 file changed, 20 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index b16f5d55f2..36053e4038 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -7,6 +7,7 @@
 #include "rte_eth_ring.h"
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <rte_os_shim.h>
@@ -44,8 +45,8 @@ enum dev_action {
 
 struct ring_queue {
 	struct rte_ring *rng;
-	RTE_ATOMIC(uint64_t) rx_pkts;
-	RTE_ATOMIC(uint64_t) tx_pkts;
+
+	struct rte_eth_counters stats;
 };
 
 struct pmd_internals {
@@ -77,12 +78,12 @@ eth_ring_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
 	struct ring_queue *r = q;
-	const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SC_DEQ)
-		r->rx_pkts += nb_rx;
-	else
-		rte_atomic_fetch_add_explicit(&r->rx_pkts, nb_rx, rte_memory_order_relaxed);
+	uint16_t nb_rx;
+
+	nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	rte_counter64_add(&r->stats.packets, nb_rx);
+
 	return nb_rx;
 }
 
@@ -91,12 +92,12 @@ eth_ring_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	void **ptrs = (void *)&bufs[0];
 	struct ring_queue *r = q;
-	const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
-			ptrs, nb_bufs, NULL);
-	if (r->rng->flags & RING_F_SP_ENQ)
-		r->tx_pkts += nb_tx;
-	else
-		rte_atomic_fetch_add_explicit(&r->tx_pkts, nb_tx, rte_memory_order_relaxed);
+	uint16_t nb_tx;
+
+	nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng, ptrs, nb_bufs, NULL);
+
+	rte_counter64_add(&r->stats.packets, nb_tx);
+
 	return nb_tx;
 }
 
@@ -193,40 +194,16 @@ eth_dev_info(struct rte_eth_dev *dev,
 static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
-		rx_total += stats->q_ipackets[i];
-	}
-
-	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
-			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
-		tx_total += stats->q_opackets[i];
-	}
-
-	stats->ipackets = rx_total;
-	stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct ring_queue, stats),
+					  offsetof(struct ring_queue, stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal = dev->data->dev_private;
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		internal->rx_ring_queues[i].rx_pkts = 0;
-	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		internal->tx_ring_queues[i].tx_pkts = 0;
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct ring_queue, stats),
+				      offsetof(struct ring_queue, stats));
 }
 
 static void
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v10 7/8] net/tap: use generic SW stats
  2024-05-22 16:12   ` [PATCH v10 0/8] Common statistics for software PMD's Stephen Hemminger
                       ` (5 preceding siblings ...)
  2024-05-22 16:12     ` [PATCH v10 6/8] net/ring: " Stephen Hemminger
@ 2024-05-22 16:12     ` Stephen Hemminger
  2024-05-22 16:12     ` [PATCH v10 8/8] net/null: " Stephen Hemminger
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-22 16:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Use new common sw statistics.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/tap/rte_eth_tap.c | 75 ++++++-----------------------------
 drivers/net/tap/rte_eth_tap.h | 15 ++-----
 2 files changed, 16 insertions(+), 74 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 69d9da695b..9cc923fd0c 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -455,7 +455,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		/* Packet couldn't fit in the provided mbuf */
 		if (unlikely(rxq->pi.flags & TUN_PKT_STRIP)) {
-			rxq->stats.ierrors++;
+			rte_eth_count_error(&rxq->stats);
 			continue;
 		}
 
@@ -467,7 +467,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
 
 			if (unlikely(!buf)) {
-				rxq->stats.rx_nombuf++;
+				struct rte_eth_dev *dev = &rte_eth_devices[rxq->in_port];
+				++dev->data->rx_mbuf_alloc_failed;
+
 				/* No new buf has been allocated: do nothing */
 				if (!new_tail || !seg)
 					goto end;
@@ -512,8 +514,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		num_rx_bytes += mbuf->pkt_len;
 	}
 end:
-	rxq->stats.ipackets += num_rx;
-	rxq->stats.ibytes += num_rx_bytes;
+	rte_eth_count_packets(&rxq->stats, num_rx, num_rx_bytes);
 
 	if (trigger && num_rx < nb_pkts)
 		rxq->trigger_seen = trigger;
@@ -693,7 +694,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			tso_segsz = mbuf_in->tso_segsz + hdrs_len;
 			if (unlikely(tso_segsz == hdrs_len) ||
 				tso_segsz > *txq->mtu) {
-				txq->stats.errs++;
+				rte_eth_count_error(&txq->stats);
 				break;
 			}
 			gso_ctx->gso_size = tso_segsz;
@@ -731,7 +732,8 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		ret = tap_write_mbufs(txq, num_mbufs, mbuf,
 				&num_packets, &num_tx_bytes);
 		if (ret == -1) {
-			txq->stats.errs++;
+			rte_eth_count_error(&txq->stats);
+
 			/* free tso mbufs */
 			if (num_tso_mbufs > 0)
 				rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs);
@@ -749,9 +751,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		}
 	}
 
-	txq->stats.opackets += num_packets;
-	txq->stats.errs += nb_pkts - num_tx;
-	txq->stats.obytes += num_tx_bytes;
+	rte_eth_count_packets(&txq->stats, num_packets, num_tx_bytes);
 
 	return num_tx;
 }
@@ -1055,64 +1055,15 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static int
 tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
 {
-	unsigned int i, imax;
-	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
-	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
-	unsigned long rx_nombuf = 0, ierrors = 0;
-	const struct pmd_internals *pmd = dev->data->dev_private;
-
-	/* rx queue statistics */
-	imax = (dev->data->nb_rx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_rx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
-		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
-		rx_total += tap_stats->q_ipackets[i];
-		rx_bytes_total += tap_stats->q_ibytes[i];
-		rx_nombuf += pmd->rxq[i].stats.rx_nombuf;
-		ierrors += pmd->rxq[i].stats.ierrors;
-	}
-
-	/* tx queue statistics */
-	imax = (dev->data->nb_tx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		dev->data->nb_tx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-
-	for (i = 0; i < imax; i++) {
-		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
-		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
-		tx_total += tap_stats->q_opackets[i];
-		tx_err_total += pmd->txq[i].stats.errs;
-		tx_bytes_total += tap_stats->q_obytes[i];
-	}
-
-	tap_stats->ipackets = rx_total;
-	tap_stats->ibytes = rx_bytes_total;
-	tap_stats->ierrors = ierrors;
-	tap_stats->rx_nombuf = rx_nombuf;
-	tap_stats->opackets = tx_total;
-	tap_stats->oerrors = tx_err_total;
-	tap_stats->obytes = tx_bytes_total;
-	return 0;
+	return rte_eth_counters_stats_get(dev, offsetof(struct tx_queue, stats),
+					  offsetof(struct rx_queue, stats), tap_stats);
 }
 
 static int
 tap_stats_reset(struct rte_eth_dev *dev)
 {
-	int i;
-	struct pmd_internals *pmd = dev->data->dev_private;
-
-	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-		pmd->rxq[i].stats.ipackets = 0;
-		pmd->rxq[i].stats.ibytes = 0;
-		pmd->rxq[i].stats.ierrors = 0;
-		pmd->rxq[i].stats.rx_nombuf = 0;
-
-		pmd->txq[i].stats.opackets = 0;
-		pmd->txq[i].stats.errs = 0;
-		pmd->txq[i].stats.obytes = 0;
-	}
-
-	return 0;
+	return rte_eth_counters_reset(dev, offsetof(struct tx_queue, stats),
+				      offsetof(struct rx_queue, stats));
 }
 
 static int
diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
index 5ac93f93e9..8cba9ea410 100644
--- a/drivers/net/tap/rte_eth_tap.h
+++ b/drivers/net/tap/rte_eth_tap.h
@@ -14,6 +14,7 @@
 #include <linux/if_tun.h>
 
 #include <ethdev_driver.h>
+#include <ethdev_swstats.h>
 #include <rte_ether.h>
 #include <rte_gso.h>
 #include "tap_log.h"
@@ -32,23 +33,13 @@ enum rte_tuntap_type {
 	ETH_TUNTAP_TYPE_MAX,
 };
 
-struct pkt_stats {
-	uint64_t opackets;              /* Number of output packets */
-	uint64_t ipackets;              /* Number of input packets */
-	uint64_t obytes;                /* Number of bytes on output */
-	uint64_t ibytes;                /* Number of bytes on input */
-	uint64_t errs;                  /* Number of TX error packets */
-	uint64_t ierrors;               /* Number of RX error packets */
-	uint64_t rx_nombuf;             /* Nb of RX mbuf alloc failures */
-};
-
 struct rx_queue {
 	struct rte_mempool *mp;         /* Mempool for RX packets */
 	uint32_t trigger_seen;          /* Last seen Rx trigger value */
 	uint16_t in_port;               /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-	struct pkt_stats stats;         /* Stats for this RX queue */
 	uint16_t nb_rx_desc;            /* max number of mbufs available */
+	struct rte_eth_counters stats;  /* Stats for this RX queue */
 	struct rte_eth_rxmode *rxmode;  /* RX features */
 	struct rte_mbuf *pool;          /* mbufs pool for this queue */
 	struct iovec (*iovecs)[];       /* descriptors for this queue */
@@ -59,7 +50,7 @@ struct tx_queue {
 	int type;                       /* Type field - TUN|TAP */
 	uint16_t *mtu;                  /* Pointer to MTU from dev_data */
 	uint16_t csum:1;                /* Enable checksum offloading */
-	struct pkt_stats stats;         /* Stats for this TX queue */
+	struct rte_eth_counters stats;	/* Stats for this TX queue */
 	struct rte_gso_ctx gso_ctx;     /* GSO context */
 	uint16_t out_port;              /* Port ID */
 	uint16_t queue_id;		/* queue ID*/
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v10 8/8] net/null: use generic SW stats
  2024-05-22 16:12   ` [PATCH v10 0/8] Common statistics for software PMD's Stephen Hemminger
                       ` (6 preceding siblings ...)
  2024-05-22 16:12     ` [PATCH v10 7/8] net/tap: " Stephen Hemminger
@ 2024-05-22 16:12     ` Stephen Hemminger
  7 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-22 16:12 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Tetsuya Mukawa

Use the new common code for statistics.
This also fixes the bug that this driver was not accounting
for bytes.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/null/rte_eth_null.c | 83 +++++++++------------------------
 1 file changed, 21 insertions(+), 62 deletions(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index f4ed3b8a7f..83add9c819 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -8,6 +8,7 @@
 #include <rte_mbuf.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
+#include <ethdev_swstats.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <bus_vdev_driver.h>
@@ -37,8 +38,8 @@ struct null_queue {
 	struct rte_mempool *mb_pool;
 	struct rte_mbuf *dummy_packet;
 
-	RTE_ATOMIC(uint64_t) rx_pkts;
-	RTE_ATOMIC(uint64_t) tx_pkts;
+	struct rte_eth_counters tx_stats;
+	struct rte_eth_counters rx_stats;
 };
 
 struct pmd_options {
@@ -101,9 +102,7 @@ eth_null_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->rx_pkts, i, rte_memory_order_seq_cst);
-
+	rte_eth_count_packets(&h->rx_stats, i, i * packet_size);
 	return i;
 }
 
@@ -129,8 +128,7 @@ eth_null_copy_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->rx_pkts, i, rte_memory_order_seq_cst);
+	rte_eth_count_packets(&h->rx_stats, i, i * packet_size);
 
 	return i;
 }
@@ -147,16 +145,17 @@ eth_null_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	int i;
 	struct null_queue *h = q;
+	uint32_t tx_bytes = 0;
 
 	if ((q == NULL) || (bufs == NULL))
 		return 0;
 
-	for (i = 0; i < nb_bufs; i++)
+	for (i = 0; i < nb_bufs; i++) {
+		tx_bytes += rte_pktmbuf_pkt_len(bufs[i]);
 		rte_pktmbuf_free(bufs[i]);
+	}
 
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->tx_pkts, i, rte_memory_order_seq_cst);
-
+	rte_eth_count_packets(&h->tx_stats, i, tx_bytes);
 	return i;
 }
 
@@ -166,20 +165,20 @@ eth_null_copy_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	int i;
 	struct null_queue *h = q;
 	unsigned int packet_size;
+	uint32_t tx_bytes = 0;
 
 	if ((q == NULL) || (bufs == NULL))
 		return 0;
 
 	packet_size = h->internals->packet_size;
 	for (i = 0; i < nb_bufs; i++) {
+		tx_bytes += rte_pktmbuf_pkt_len(bufs[i]);
 		rte_memcpy(h->dummy_packet, rte_pktmbuf_mtod(bufs[i], void *),
 					packet_size);
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	/* NOTE: review for potential ordering optimization */
-	rte_atomic_fetch_add_explicit(&h->tx_pkts, i, rte_memory_order_seq_cst);
-
+	rte_eth_count_packets(&h->tx_stats, i, tx_bytes);
 	return i;
 }
 
@@ -322,60 +321,20 @@ eth_dev_info(struct rte_eth_dev *dev,
 }
 
 static int
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	unsigned int i, num_stats;
-	unsigned long rx_total = 0, tx_total = 0;
-	const struct pmd_internals *internal;
-
-	if ((dev == NULL) || (igb_stats == NULL))
-		return -EINVAL;
-
-	internal = dev->data->dev_private;
-	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
-			RTE_MIN(dev->data->nb_rx_queues,
-				RTE_DIM(internal->rx_null_queues)));
-	for (i = 0; i < num_stats; i++) {
-		/* NOTE: review for atomic access */
-		igb_stats->q_ipackets[i] =
-			internal->rx_null_queues[i].rx_pkts;
-		rx_total += igb_stats->q_ipackets[i];
-	}
-
-	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
-			RTE_MIN(dev->data->nb_tx_queues,
-				RTE_DIM(internal->tx_null_queues)));
-	for (i = 0; i < num_stats; i++) {
-		/* NOTE: review for atomic access */
-		igb_stats->q_opackets[i] =
-			internal->tx_null_queues[i].tx_pkts;
-		tx_total += igb_stats->q_opackets[i];
-	}
-
-	igb_stats->ipackets = rx_total;
-	igb_stats->opackets = tx_total;
-
-	return 0;
+	return rte_eth_counters_stats_get(dev,
+					  offsetof(struct null_queue, tx_stats),
+					  offsetof(struct null_queue, rx_stats),
+					  stats);
 }
 
 static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
-	unsigned int i;
-	struct pmd_internals *internal;
-
-	if (dev == NULL)
-		return -EINVAL;
-
-	internal = dev->data->dev_private;
-	for (i = 0; i < RTE_DIM(internal->rx_null_queues); i++)
-		/* NOTE: review for atomic access */
-		internal->rx_null_queues[i].rx_pkts = 0;
-	for (i = 0; i < RTE_DIM(internal->tx_null_queues); i++)
-		/* NOTE: review for atomic access */
-		internal->tx_null_queues[i].tx_pkts = 0;
-
-	return 0;
+	return rte_eth_counters_reset(dev,
+				      offsetof(struct null_queue, tx_stats),
+				      offsetof(struct null_queue, rx_stats));
 }
 
 static void
-- 
2.43.0


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v9 1/8] eal: generic 64 bit counter
  2024-05-22 15:37         ` Stephen Hemminger
@ 2024-05-22 17:57           ` Morten Brørup
  2024-05-22 19:01             ` Tyler Retzlaff
  0 siblings, 1 reply; 179+ messages in thread
From: Morten Brørup @ 2024-05-22 17:57 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Tyler Retzlaff

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Wednesday, 22 May 2024 17.38
> 
> On Wed, 22 May 2024 10:31:39 +0200
> Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> > > +/* On 32 bit platform, need to use atomic to avoid load/store
> tearing */
> > > +typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
> >
> > As shown by Godbolt experiments discussed in a previous thread [2],
> non-tearing 64 bit counters can be implemented without using atomic
> instructions on all 32 bit architectures supported by DPDK. So we should
> use the counter/offset design pattern for RTE_ARCH_32 too.
> >
> > [2]:
> https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F433@smarts
> erver.smartshare.dk/
> 
> 
> This code built with -O3 and -m32 on godbolt shows split problem.
> 
> #include <stdint.h>
> 
> typedef uint64_t rte_counter64_t;
> 
> void
> rte_counter64_add(rte_counter64_t *counter, uint32_t val)
> {
> 	*counter += val;
> }
> …	*counter = val;
> }
> 
> rte_counter64_add:
>         push    ebx
>         mov     eax, DWORD PTR [esp+8]
>         xor     ebx, ebx
>         mov     ecx, DWORD PTR [esp+12]
>         add     DWORD PTR [eax], ecx
>         adc     DWORD PTR [eax+4], ebx
>         pop     ebx
>         ret
> 
> rte_counter64_read:
>         mov     eax, DWORD PTR [esp+4]
>         mov     edx, DWORD PTR [eax+4]
>         mov     eax, DWORD PTR [eax]
>         ret
> rte_counter64_set:
>         movq    xmm0, QWORD PTR [esp+8]
>         mov     eax, DWORD PTR [esp+4]
>         movq    QWORD PTR [eax], xmm0
>         ret

Sure, atomic might be required on some 32 bit architectures and/or with some compilers.

I envision a variety of 32 bit implementations, optimized for certain architectures/compilers.

Some of them can provide non-tearing 64 bit load/store, so we should also use the counter/offset design pattern for those.


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v9 1/8] eal: generic 64 bit counter
  2024-05-22 15:33         ` Stephen Hemminger
@ 2024-05-22 18:09           ` Morten Brørup
  2024-05-22 19:53             ` Stephen Hemminger
  0 siblings, 1 reply; 179+ messages in thread
From: Morten Brørup @ 2024-05-22 18:09 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Tyler Retzlaff

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Wednesday, 22 May 2024 17.33
> 
> On Wed, 22 May 2024 10:31:39 +0200
> Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> > > +/**
> > > + * @warning
> > > + * @b EXPERIMENTAL: this API may change without prior notice.
> > > + *
> > > + * On native 64 bit platform, counter is implemented as basic
> > > + * 64 bit unsigned integer that only increases.
> > > + */
> > > +typedef struct {
> > > +	uint64_t current;
> > > +	uint64_t offset;
> > > +} rte_counter64_t;
> >
> > As discussed in the other thread [1], I strongly prefer having
> "current" and "offset" separate, for performance reasons.
> > Keeping each offset close together with its counter will require more
> cache lines than necessary, because the offsets take up space in the hot
> part of a fast path data structure. E.g. the size_bins[] counters could
> fit into one cache line instead of two.
> >
> > [1]:
> https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F422@smarts
> erver.smartshare.dk/
> 
> There are no size_bins in the current version of the patch.
> And the number of counters in ethdev part are small so it is less of a
> concern.
> The code is easier to maintain if the counter object is self contained.

I agree that there are advantages to keeping the counter object self contained.

However, these counters are generic, so we cannot assume that there are only very few, based on how the current software device drivers use them.

Someone might want to add size_bins to the software device drivers.
And someone else might want to collect many counters in some application or library structure.

> 
> > > +
> > > +/* On 32 bit platform, need to use atomic to avoid load/store
> tearing */
> > > +typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
> >
> > As shown by Godbolt experiments discussed in a previous thread [2],
> non-tearing 64 bit counters can be implemented without using atomic
> instructions on all 32 bit architectures supported by DPDK. So we should
> use the counter/offset design pattern for RTE_ARCH_32 too.
> >
> > [2]:
> https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F433@smarts
> erver.smartshare.dk/
> 
> Bruce found some 32 bit versions of x86 have the problem.
> Godbolt doesn't seem to list 32 bit x86 compiler for Gcc.
> If you try MSVC in 32 bit mode it will split the loads.
> I see no problem on ARM which is the only other 32 bit we care about.
> 

Yeah, there seems to be a lot of work testing compiler behavior here. Let's start with a generic 32 bit implementation based on atomics, which should work correctly on all architectures/compilers.
Optimized architecture- and compiler-optimized variants can be added by interested CPU vendors later.


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v9 1/8] eal: generic 64 bit counter
  2024-05-22 17:57           ` Morten Brørup
@ 2024-05-22 19:01             ` Tyler Retzlaff
  2024-05-22 19:51               ` Stephen Hemminger
  2024-05-26 14:39               ` Mattias Rönnblom
  0 siblings, 2 replies; 179+ messages in thread
From: Tyler Retzlaff @ 2024-05-22 19:01 UTC (permalink / raw)
  To: Morten Brørup; +Cc: Stephen Hemminger, dev

On Wed, May 22, 2024 at 07:57:01PM +0200, Morten Brørup wrote:
> > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > Sent: Wednesday, 22 May 2024 17.38
> > 
> > On Wed, 22 May 2024 10:31:39 +0200
> > Morten Brørup <mb@smartsharesystems.com> wrote:
> > 
> > > > +/* On 32 bit platform, need to use atomic to avoid load/store
> > tearing */
> > > > +typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
> > >
> > > As shown by Godbolt experiments discussed in a previous thread [2],
> > non-tearing 64 bit counters can be implemented without using atomic
> > instructions on all 32 bit architectures supported by DPDK. So we should
> > use the counter/offset design pattern for RTE_ARCH_32 too.
> > >
> > > [2]:
> > https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F433@smarts
> > erver.smartshare.dk/
> > 
> > 
> > This code built with -O3 and -m32 on godbolt shows split problem.
> > 
> > #include <stdint.h>
> > 
> > typedef uint64_t rte_counter64_t;
> > 
> > void
> > rte_counter64_add(rte_counter64_t *counter, uint32_t val)
> > {
> > 	*counter += val;
> > }
> > …	*counter = val;
> > }
> > 
> > rte_counter64_add:
> >         push    ebx
> >         mov     eax, DWORD PTR [esp+8]
> >         xor     ebx, ebx
> >         mov     ecx, DWORD PTR [esp+12]
> >         add     DWORD PTR [eax], ecx
> >         adc     DWORD PTR [eax+4], ebx
> >         pop     ebx
> >         ret
> > 
> > rte_counter64_read:
> >         mov     eax, DWORD PTR [esp+4]
> >         mov     edx, DWORD PTR [eax+4]
> >         mov     eax, DWORD PTR [eax]
> >         ret
> > rte_counter64_set:
> >         movq    xmm0, QWORD PTR [esp+8]
> >         mov     eax, DWORD PTR [esp+4]
> >         movq    QWORD PTR [eax], xmm0
> >         ret
> 
> Sure, atomic might be required on some 32 bit architectures and/or with some compilers.

in theory i think you should be able to use generic atomics and
depending on the target you get codegen that works. it might be
something more expensive on 32-bit and nothing on 64-bit etc..

what's the damage if we just use atomic generic and relaxed ordering? is
the codegen not optimal?
 
> I envision a variety of 32 bit implementations, optimized for certain architectures/compilers.
> 
> Some of them can provide non-tearing 64 bit load/store, so we should also use the counter/offset design pattern for those.
> 

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v9 1/8] eal: generic 64 bit counter
  2024-05-22 19:01             ` Tyler Retzlaff
@ 2024-05-22 19:51               ` Stephen Hemminger
  2024-05-26 14:46                 ` Mattias Rönnblom
  2024-05-26 14:39               ` Mattias Rönnblom
  1 sibling, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-22 19:51 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: Morten Brørup, dev

On Wed, 22 May 2024 12:01:12 -0700
Tyler Retzlaff <roretzla@linux.microsoft.com> wrote:

> On Wed, May 22, 2024 at 07:57:01PM +0200, Morten Brørup wrote:
> > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > Sent: Wednesday, 22 May 2024 17.38
> > > 
> > > On Wed, 22 May 2024 10:31:39 +0200
> > > Morten Brørup <mb@smartsharesystems.com> wrote:
> > >   
> > > > > +/* On 32 bit platform, need to use atomic to avoid load/store  
> > > tearing */  
> > > > > +typedef RTE_ATOMIC(uint64_t) rte_counter64_t;  
> > > >
> > > > As shown by Godbolt experiments discussed in a previous thread [2],  
> > > non-tearing 64 bit counters can be implemented without using atomic
> > > instructions on all 32 bit architectures supported by DPDK. So we should
> > > use the counter/offset design pattern for RTE_ARCH_32 too.  
> > > >
> > > > [2]:  
> > > https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F433@smarts
> > > erver.smartshare.dk/
> > > 
> > > 
> > > This code built with -O3 and -m32 on godbolt shows split problem.
> > > 
> > > #include <stdint.h>
> > > 
> > > typedef uint64_t rte_counter64_t;
> > > 
> > > void
> > > rte_counter64_add(rte_counter64_t *counter, uint32_t val)
> > > {
> > > 	*counter += val;
> > > }
> > > …	*counter = val;
> > > }
> > > 
> > > rte_counter64_add:
> > >         push    ebx
> > >         mov     eax, DWORD PTR [esp+8]
> > >         xor     ebx, ebx
> > >         mov     ecx, DWORD PTR [esp+12]
> > >         add     DWORD PTR [eax], ecx
> > >         adc     DWORD PTR [eax+4], ebx
> > >         pop     ebx
> > >         ret
> > > 
> > > rte_counter64_read:
> > >         mov     eax, DWORD PTR [esp+4]
> > >         mov     edx, DWORD PTR [eax+4]
> > >         mov     eax, DWORD PTR [eax]
> > >         ret
> > > rte_counter64_set:
> > >         movq    xmm0, QWORD PTR [esp+8]
> > >         mov     eax, DWORD PTR [esp+4]
> > >         movq    QWORD PTR [eax], xmm0
> > >         ret  
> > 
> > Sure, atomic might be required on some 32 bit architectures and/or with some compilers.  
> 
> in theory i think you should be able to use generic atomics and
> depending on the target you get codegen that works. it might be
> something more expensive on 32-bit and nothing on 64-bit etc..
> 
> what's the damage if we just use atomic generic and relaxed ordering? is
> the codegen not optimal?

If we use atomic with relaxed memory order, then compiler for x86 still generates
a locked increment in the fast path. This costs about 100 extra cycles due
to cache and prefetch stall. This whole endeavor is an attempt to avoid that.

PS: looking at the locked increment code for 32 bit involves locked compare
exchange and potential retry. Probably don't care about performance on that platform
anymore.



^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v9 1/8] eal: generic 64 bit counter
  2024-05-22 18:09           ` Morten Brørup
@ 2024-05-22 19:53             ` Stephen Hemminger
  2024-05-22 20:56               ` Morten Brørup
  0 siblings, 1 reply; 179+ messages in thread
From: Stephen Hemminger @ 2024-05-22 19:53 UTC (permalink / raw)
  To: Morten Brørup; +Cc: dev, Tyler Retzlaff

On Wed, 22 May 2024 20:09:23 +0200
Morten Brørup <mb@smartsharesystems.com> wrote:

> > There are no size_bins in the current version of the patch.
> > And the number of counters in ethdev part are small so it is less of a
> > concern.
> > The code is easier to maintain if the counter object is self contained.  
> 
> I agree that there are advantages to keeping the counter object self contained.
> 
> However, these counters are generic, so we cannot assume that there are only very few, based on how the current software device drivers use them.
> 
> Someone might want to add size_bins to the software device drivers.
> And someone else might want to collect many counters in some application or library structure.


No.
The implementation should be as simple and as small as possible for the use case
that is presented in the patch series. Doing something more complex leads to the
classic YAGNI situation, where when the new case really happens the implemenation
just doesn't quite fit.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v9 1/8] eal: generic 64 bit counter
  2024-05-22 19:53             ` Stephen Hemminger
@ 2024-05-22 20:56               ` Morten Brørup
  0 siblings, 0 replies; 179+ messages in thread
From: Morten Brørup @ 2024-05-22 20:56 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Tyler Retzlaff

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Wednesday, 22 May 2024 21.54
> 
> On Wed, 22 May 2024 20:09:23 +0200
> Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> > > There are no size_bins in the current version of the patch.
> > > And the number of counters in ethdev part are small so it is less of
> a
> > > concern.
> > > The code is easier to maintain if the counter object is self
> contained.
> >
> > I agree that there are advantages to keeping the counter object self
> contained.
> >
> > However, these counters are generic, so we cannot assume that there
> are only very few, based on how the current software device drivers use
> them.
> >
> > Someone might want to add size_bins to the software device drivers.
> > And someone else might want to collect many counters in some
> application or library structure.
> 
> 
> No.
> The implementation should be as simple and as small as possible for the
> use case
> that is presented in the patch series.

I checked a random one of the use cases presented, rte_eth_af_packet:

The pkt_rx_queue struct grows to two cache lines, with the packets counter in the first cache line and the other counters in the second cache line.
By moving pkt_rx_queue's "sockfd" field below the pointers, the structure is better packed. If following my proposal, i.e. keeping the counters grouped together (and the offsets grouped together), all three counters stay within the first cache line (and the offsets go into the second).

The pkt_tx_queue struct also grows to two cache lines, with the first two counters in the first cache line and the third counter in the second cache line. With my proposal, the counters fit within the first cache line.

> Doing something more complex
> leads to the
> classic YAGNI situation, where when the new case really happens the
> implemenation
> just doesn't quite fit.

I disagree about YAGNI for this patch.
We *do* need the counter API to have the offset separate from the counter to avoid a performance degradation.
It is only slightly more complex, so I'm not convinced it's going to be a problem.
Passing a pointer to the offset as an additional parameter to fetch() and reset() is straightforward.
And using two instances of struct rte_eth_counters, one for counters and one for offsets, isn't complex either.

The rte_eth_af_packet use case shows that the risk of touching an increased number of cache lines (by moving the offsets into the hot part of the structures) is real.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v3 1/7] eal: generic 64 bit counter
  2024-05-14 15:35     ` [PATCH v3 1/7] eal: generic 64 bit counter Stephen Hemminger
  2024-05-15  9:30       ` Morten Brørup
@ 2024-05-26  6:45       ` Mattias Rönnblom
  1 sibling, 0 replies; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-26  6:45 UTC (permalink / raw)
  To: Stephen Hemminger, dev; +Cc: Morten Brørup, Tyler Retzlaff

On 2024-05-14 17:35, Stephen Hemminger wrote:
> This header implements 64 bit counters that are NOT atomic
> but are safe against load/store splits on 32 bit platforms.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---
>   lib/eal/include/meson.build   |  1 +
>   lib/eal/include/rte_counter.h | 91 +++++++++++++++++++++++++++++++++++
>   2 files changed, 92 insertions(+)
>   create mode 100644 lib/eal/include/rte_counter.h
> 
> diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
> index e94b056d46..c070dd0079 100644
> --- a/lib/eal/include/meson.build
> +++ b/lib/eal/include/meson.build
> @@ -12,6 +12,7 @@ headers += files(
>           'rte_class.h',
>           'rte_common.h',
>           'rte_compat.h',
> +        'rte_counter.h',
>           'rte_debug.h',
>           'rte_dev.h',
>           'rte_devargs.h',
> diff --git a/lib/eal/include/rte_counter.h b/lib/eal/include/rte_counter.h
> new file mode 100644
> index 0000000000..8068d6d26e
> --- /dev/null
> +++ b/lib/eal/include/rte_counter.h
> @@ -0,0 +1,91 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
> + */
> +
> +#ifndef _RTE_COUNTER_H_
> +#define _RTE_COUNTER_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/**
> + * @file
> + * RTE Counter
> + *
> + * A counter is 64 bit value that is safe from split read/write
> + * on 32 bit platforms. It assumes that only one cpu at a time
> + * will update the counter, and another CPU may want to read it.

It's not totally obvious what "split read/write" means.

I think there is a word for this already; atomic. Atomic read/load and 
atomic write/store.

"A counter is value which can be atomically read, atomically written to, 
but does not allow atomic arithmetic operations (such as add), making 
them mostly useful in single-writer scenarios."

> + *
> + * This is a much weaker guarantee than full atomic variables
> + * but is faster since no locked operations are required for update.
> + */
> +
> +#include <stdatomic.h>

This shouldn't read rte_stdatomic.h?

> +
> +#ifdef RTE_ARCH_64
> +/*
> + * On a platform that can support native 64 bit type, no special handling.
> + * These are just wrapper around 64 bit value.
> + */
> +typedef uint64_t rte_counter64_t;
> +
> +/**
> + * Add value to counter.
> + */
> +__rte_experimental
> +static inline void
> +rte_counter64_add(rte_counter64_t *counter, uint32_t val)

Shouldn't 'val' also be uint64_t? Can't see it would be slower.

> +{
> +	*counter += val;
> +}
> +
> +__rte_experimental
> +static inline uint64_t
> +rte_counter64_fetch(const rte_counter64_t *counter)
> +{
> +	return *counter;
> +}
> +
> +__rte_experimental
> +static inline void
> +rte_counter64_reset(rte_counter64_t *counter)
> +{
> +	*counter = 0;
> +}
> +
> +#else
> +/*
> + * On a 32 bit platform need to use atomic to force the compler to not
> + * split 64 bit read/write.
> + */
> +typedef RTE_ATOMIC(uint64_t) rte_counter64_t;

To have an API that sometimes, for certain build configurations and 
architectures, makes some object _Atomic, makes me somewhat uneasy. All 
direct accesses to the object in question (e.g., my_counter++) will be 
atomic with SEQ CST memory model.

The alternative, to always use the regular type (uint64_t in this case), 
and cast to _Atomic (RTE_ATOMIC()) also seems less than ideal.

The atomic bit operations in the bitops patch set takes the latter approach.

> +
> +__rte_experimental
> +static inline void
> +rte_counter64_add(rte_counter64_t *counter, uint32_t val)
> +{
> +	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);

This is overkill, and will generate a locked instruction on x86.

Use an atomic load, a non-atomic add, and an atomic store. A non-atomic 
load would do, but with RTE_ATOMIC() I don't think there's a safe way to 
achieve that.

uint64_t value = *counter;

would be a non-atomic load on non-C11-atomics-builds, but an atomic load 
with SEQ CST memory ordering on C11-atomics-enabled builds.

> +}
> +
> +__rte_experimental
> +static inline uint64_t
> +rte_counter64_fetch(const rte_counter64_t *counter)
> +{
> +	return rte_atomic_load_explicit(counter, rte_memory_order_relaxed);
> +}
> +
> +__rte_experimental
> +static inline void
> +rte_counter64_reset(rte_counter64_t *counter)
> +{
> +	rte_atomic_store_explicit(counter, 0, rte_memory_order_relaxed);
> +}
> +#endif
> +
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_COUNTER_H_ */

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-09 11:37                 ` Morten Brørup
  2024-05-09 14:19                   ` Morten Brørup
@ 2024-05-26  7:03                   ` Mattias Rönnblom
  1 sibling, 0 replies; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-26  7:03 UTC (permalink / raw)
  To: Morten Brørup, Bruce Richardson
  Cc: Stephen Hemminger, Ferruh Yigit, John W. Linville,
	Thomas Monjalon, dev, Mattias Rönnblom

On 2024-05-09 13:37, Morten Brørup wrote:
>> From: Bruce Richardson [mailto:bruce.richardson@intel.com]
>> Sent: Thursday, 9 May 2024 11.30
>>
>> On Thu, May 09, 2024 at 09:43:16AM +0200, Morten Brørup wrote:
>>>> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
>>>> Sent: Wednesday, 8 May 2024 22.54
>>>>
>>>> On Wed, 8 May 2024 20:48:06 +0100
>>>> Ferruh Yigit <ferruh.yigit@amd.com> wrote:
>>>>
>>>>>>
>>>>>> The idea of load tearing is crazy talk of integral types. It
>> would
>>>> break so many things.
>>>>>> It is the kind of stupid compiler thing that would send Linus on
>> a
>>>> rant and get
>>>>>> the GCC compiler writers in trouble.
>>>>>>
>>>>>> The DPDK has always favored performance over strict safety guard
>>>> rails everywhere.
>>>>>> Switching to making every statistic an atomic operation is not
>> in
>>>> the spirit of
>>>>>> what is required. There is no strict guarantee necessary here.
>>>>>>
>>>>>
>>>>> I kind of agree with Stephen.
>>>>>
>>>>> Thanks Mattias, Morten & Stephen, it was informative discussion.
>> But
>>>> for
>>>>> *SW drivers* stats update and reset is not core functionality and
>> I
>>>>> think we can be OK to get hit on corner cases, instead of
>>>>> over-engineering or making code more complex.
>>>>
>>>>
>>>> I forgot the case of 64 bit values on 32 bit platforms!
>>>> Mostly because haven't cared about 32 bit for years...
>>>>
>>>> The Linux kernel uses some wrappers to handle this.
>>>> On 64 bit platforms they become noop.
>>>> On 32 bit platform, they are protected by a seqlock and updates are
>>>> wrapped by the sequence count.
>>>>
>>>> If we go this way, then doing similar Noop on 64 bit and atomic or
>>>> seqlock
>>>> on 32 bit should be done, but in common helper.
>>>>
>>>> Looking inside FreeBSD, it looks like that has changed over the
>> years as
>>>> well.
>>>>
>>>> 	if_inc_counter
>>>> 		counter_u64_add
>>>> 			atomic_add_64
>>>> But the counters are always per-cpu in this case. So although it
>> does
>>>> use
>>>> locked operation, will always be uncontended.
>>>>
>>>>
>>>> PS: Does DPDK still actually support 32 bit on x86? Can it be
>> dropped
>>>> this cycle?
>>>
>>> We cannot drop 32 bit architecture support altogether.
>>>
>>> But, unlike the Linux kernel, DPDK doesn't need to support ancient 32
>> bit architectures.
>>> If the few 32 bit architectures supported by DPDK provide non-tearing
>> 64 bit loads/stores, we don't need locks (in the fast path) for 64 bit
>> counters.
>>>
>>> In addition to 32 bit x86, DPDK supports ARMv7-A (a 32 bit
>> architecture) and 32 bit ARMv8.
>>> I don't think DPDK support any other 32 bit architectures.
>>>
>>>
>>> As Mattias mentioned, 32 bit x86 can use xmm registers to provide 64
>> bit non-tearing load/store.
>>>
>>
>> Testing this a little in godbolt, I see gcc using xmm registers on 32-
>> bit
>> when updating 64-bit counters, but clang doesn't seem to do so, but
>> instead
>> does 2 stores when writing back the 64 value. (I tried with both
>> volatile
>> and non-volatile 64-bit values, just to see if volatile would encourage
>> clang to do a single store).
>>
>> GCC: https://godbolt.org/z/9eqKfT3hz
>> Clang: https://godbolt.org/z/PT5EqKn4c
> 
> Interesting.
> I guess this can be fixed by manually implementing what GCC does.
> 

If you want an atomic store, use __atomic_store_n() or the equivalent.

On GCC, the resulting code will be compact and use an XMM register for 
the store.

On clang, you get much more instructions including a conditional jump to 
a compiler library function. You also get a compiler warning about 
misaligned atomic may incur significant cost (no surprise). All this on 
32-bit x86. Interesting, you get all those instructions even when you 
explicitly tell the compiler the address should be assumed to be 
aligned. Seems like a bug, or at least a short-coming.

So GCC assumes atomic stores are aligned, which seems perfectly 
reasonable to me.

> I'm more concerned about finding a high-performance (in the fast path) 64 bit counter solution for 32 bit ARM.
> 

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-09 14:19                   ` Morten Brørup
  2024-05-10  4:56                     ` Stephen Hemminger
@ 2024-05-26  7:07                     ` Mattias Rönnblom
  1 sibling, 0 replies; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-26  7:07 UTC (permalink / raw)
  To: Morten Brørup, Bruce Richardson
  Cc: Stephen Hemminger, Ferruh Yigit, John W. Linville,
	Thomas Monjalon, dev, Mattias Rönnblom

On 2024-05-09 16:19, Morten Brørup wrote:
>> From: Morten Brørup [mailto:mb@smartsharesystems.com]
>> Sent: Thursday, 9 May 2024 13.37
>>
>>> From: Bruce Richardson [mailto:bruce.richardson@intel.com]
>>> Sent: Thursday, 9 May 2024 11.30
>>>
>>> On Thu, May 09, 2024 at 09:43:16AM +0200, Morten Brørup wrote:
>>>>> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
>>>>> Sent: Wednesday, 8 May 2024 22.54
>>>>>
>>>>> On Wed, 8 May 2024 20:48:06 +0100
>>>>> Ferruh Yigit <ferruh.yigit@amd.com> wrote:
>>>>>
>>>>>>>
>>>>>>> The idea of load tearing is crazy talk of integral types. It
>>> would
>>>>> break so many things.
>>>>>>> It is the kind of stupid compiler thing that would send Linus
>> on
>>> a
>>>>> rant and get
>>>>>>> the GCC compiler writers in trouble.
>>>>>>>
>>>>>>> The DPDK has always favored performance over strict safety
>> guard
>>>>> rails everywhere.
>>>>>>> Switching to making every statistic an atomic operation is not
>>> in
>>>>> the spirit of
>>>>>>> what is required. There is no strict guarantee necessary here.
>>>>>>>
>>>>>>
>>>>>> I kind of agree with Stephen.
>>>>>>
>>>>>> Thanks Mattias, Morten & Stephen, it was informative discussion.
>>> But
>>>>> for
>>>>>> *SW drivers* stats update and reset is not core functionality
>> and
>>> I
>>>>>> think we can be OK to get hit on corner cases, instead of
>>>>>> over-engineering or making code more complex.
>>>>>
>>>>>
>>>>> I forgot the case of 64 bit values on 32 bit platforms!
>>>>> Mostly because haven't cared about 32 bit for years...
>>>>>
>>>>> The Linux kernel uses some wrappers to handle this.
>>>>> On 64 bit platforms they become noop.
>>>>> On 32 bit platform, they are protected by a seqlock and updates
>> are
>>>>> wrapped by the sequence count.
>>>>>
>>>>> If we go this way, then doing similar Noop on 64 bit and atomic or
>>>>> seqlock
>>>>> on 32 bit should be done, but in common helper.
>>>>>
>>>>> Looking inside FreeBSD, it looks like that has changed over the
>>> years as
>>>>> well.
>>>>>
>>>>> 	if_inc_counter
>>>>> 		counter_u64_add
>>>>> 			atomic_add_64
>>>>> But the counters are always per-cpu in this case. So although it
>>> does
>>>>> use
>>>>> locked operation, will always be uncontended.
>>>>>
>>>>>
>>>>> PS: Does DPDK still actually support 32 bit on x86? Can it be
>>> dropped
>>>>> this cycle?
>>>>
>>>> We cannot drop 32 bit architecture support altogether.
>>>>
>>>> But, unlike the Linux kernel, DPDK doesn't need to support ancient
>> 32
>>> bit architectures.
>>>> If the few 32 bit architectures supported by DPDK provide non-
>> tearing
>>> 64 bit loads/stores, we don't need locks (in the fast path) for 64 bit
>>> counters.
>>>>
>>>> In addition to 32 bit x86, DPDK supports ARMv7-A (a 32 bit
>>> architecture) and 32 bit ARMv8.
>>>> I don't think DPDK support any other 32 bit architectures.
>>>>
>>>>
>>>> As Mattias mentioned, 32 bit x86 can use xmm registers to provide 64
>>> bit non-tearing load/store.
>>>>
>>>
>>> Testing this a little in godbolt, I see gcc using xmm registers on 32-
>>> bit
>>> when updating 64-bit counters, but clang doesn't seem to do so, but
>>> instead
>>> does 2 stores when writing back the 64 value. (I tried with both
>>> volatile
>>> and non-volatile 64-bit values, just to see if volatile would
>> encourage
>>> clang to do a single store).
>>>
>>> GCC: https://godbolt.org/z/9eqKfT3hz
>>> Clang: https://godbolt.org/z/PT5EqKn4c
>>
>> Interesting.
>> I guess this can be fixed by manually implementing what GCC does.
>>
>> I'm more concerned about finding a high-performance (in the fast path)
>> 64 bit counter solution for 32 bit ARM.
> 
> Reading up on the topic, and continuing Bruce's experiment on Godbolt, it is possible on 32 bit ARMv7-A too, using LDRD/STRD (Load/Store Register Dual) instructions, which load/store 64 bit from memory into two registers at once.
> 
> Clang is emits more efficient code without volatile.
> GCC requires volatile to use STRD.
> 
> Clang: https://godbolt.org/z/WjdTq6EKh
> GCC: https://godbolt.org/z/qq9j7d4Ea
> 
> Summing it up, it is possible to implement non-tearing 64 bit high-performance (lockless, barrier-free) counters on the 32 bit architectures supported by DPDK.
> 
> But the implementation is both architecture and compiler specific.
> So it seems a "64 bit counters" library would be handy. (Or a "non-tearing 64 bit integers" library, for support of the signed variant too; but I don't think we need that.)
> We can use uint64_t as the underlying type and type cast in the library (where needed by the specific compiler/architecture), or introduce a new rte_ctr64_t type to ensure that accessor functions are always used and the developer doesn't have to worry about tearing on 32 bit architectures.
> 
> The most simple variant of such a library only provides load and store functions. The API would look like this:
> 
> uint64_t inline
> rte_ctr64_get(const rte_ctr64_t *const ctr);
> 
> void inline
> rte_ctr64_set(rte_ctr64_t *const ctr, const uint64_t value);
> 
> And if some CPU offers special instructions for increment or addition, faster (regarding performance) and/or more compact (regarding instruction memory) than a sequence of load-add-store instructions:
> 
> void inline
> rte_ctr64_inc(rte_ctr64_t *const ctr);
> 
> void inline
> rte_ctr64_add(rte_ctr64_t *const ctr, const uint64_t value);
> 
> <feature creep>
> And perhaps atomic variants of all these functions, with explicit and/or relaxed memory ordering, for counters shared by multiple threads.

Isn't rte_ctr64_get() already an atomic load with relaxed memory ordering?

> </feature creep>
> 

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-10  9:14                       ` Morten Brørup
@ 2024-05-26  7:10                         ` Mattias Rönnblom
  0 siblings, 0 replies; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-26  7:10 UTC (permalink / raw)
  To: Morten Brørup, Stephen Hemminger
  Cc: Bruce Richardson, Ferruh Yigit, John W. Linville,
	Thomas Monjalon, dev, Mattias Rönnblom

On 2024-05-10 11:14, Morten Brørup wrote:
>> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
>> Sent: Friday, 10 May 2024 06.56
>>
>> On Thu, 9 May 2024 16:19:08 +0200
>> Morten Brørup <mb@smartsharesystems.com> wrote:
>>
>>>> From: Morten Brørup [mailto:mb@smartsharesystems.com]
>>>> Sent: Thursday, 9 May 2024 13.37
>>>>
>>>>> From: Bruce Richardson [mailto:bruce.richardson@intel.com]
>>>>> Sent: Thursday, 9 May 2024 11.30
>>>>>
>>>>> On Thu, May 09, 2024 at 09:43:16AM +0200, Morten Brørup wrote:
>>>>>>> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
>>>>>>> Sent: Wednesday, 8 May 2024 22.54
>>>>>>>
>>>>>>> On Wed, 8 May 2024 20:48:06 +0100
>>>>>>> Ferruh Yigit <ferruh.yigit@amd.com> wrote:
>>>>>>>
>>>>>>>>>
>>>>>>>>> The idea of load tearing is crazy talk of integral types.
>> It
>>>>> would
>>>>>>> break so many things.
>>>>>>>>> It is the kind of stupid compiler thing that would send
>> Linus
>>>> on
>>>>> a
>>>>>>> rant and get
>>>>>>>>> the GCC compiler writers in trouble.
>>>>>>>>>
>>>>>>>>> The DPDK has always favored performance over strict safety
>>>> guard
>>>>>>> rails everywhere.
>>>>>>>>> Switching to making every statistic an atomic operation is
>> not
>>>>> in
>>>>>>> the spirit of
>>>>>>>>> what is required. There is no strict guarantee necessary
>> here.
>>>>>>>>>
>>>>>>>>
>>>>>>>> I kind of agree with Stephen.
>>>>>>>>
>>>>>>>> Thanks Mattias, Morten & Stephen, it was informative
>> discussion.
>>>>> But
>>>>>>> for
>>>>>>>> *SW drivers* stats update and reset is not core
>> functionality
>>>> and
>>>>> I
>>>>>>>> think we can be OK to get hit on corner cases, instead of
>>>>>>>> over-engineering or making code more complex.
>>>>>>>
>>>>>>>
>>>>>>> I forgot the case of 64 bit values on 32 bit platforms!
>>>>>>> Mostly because haven't cared about 32 bit for years...
>>>>>>>
>>>>>>> The Linux kernel uses some wrappers to handle this.
>>>>>>> On 64 bit platforms they become noop.
>>>>>>> On 32 bit platform, they are protected by a seqlock and
>> updates
>>>> are
>>>>>>> wrapped by the sequence count.
>>>>>>>
>>>>>>> If we go this way, then doing similar Noop on 64 bit and
>> atomic or
>>>>>>> seqlock
>>>>>>> on 32 bit should be done, but in common helper.
>>>>>>>
>>>>>>> Looking inside FreeBSD, it looks like that has changed over
>> the
>>>>> years as
>>>>>>> well.
>>>>>>>
>>>>>>> 	if_inc_counter
>>>>>>> 		counter_u64_add
>>>>>>> 			atomic_add_64
>>>>>>> But the counters are always per-cpu in this case. So although
>> it
>>>>> does
>>>>>>> use
>>>>>>> locked operation, will always be uncontended.
>>>>>>>
>>>>>>>
>>>>>>> PS: Does DPDK still actually support 32 bit on x86? Can it be
>>>>> dropped
>>>>>>> this cycle?
>>>>>>
>>>>>> We cannot drop 32 bit architecture support altogether.
>>>>>>
>>>>>> But, unlike the Linux kernel, DPDK doesn't need to support
>> ancient
>>>> 32
>>>>> bit architectures.
>>>>>> If the few 32 bit architectures supported by DPDK provide non-
>>>> tearing
>>>>> 64 bit loads/stores, we don't need locks (in the fast path) for 64
>> bit
>>>>> counters.
>>>>>>
>>>>>> In addition to 32 bit x86, DPDK supports ARMv7-A (a 32 bit
>>>>> architecture) and 32 bit ARMv8.
>>>>>> I don't think DPDK support any other 32 bit architectures.
>>>>>>
>>>>>>
>>>>>> As Mattias mentioned, 32 bit x86 can use xmm registers to
>> provide 64
>>>>> bit non-tearing load/store.
>>>>>>
>>>>>
>>>>> Testing this a little in godbolt, I see gcc using xmm registers on
>> 32-
>>>>> bit
>>>>> when updating 64-bit counters, but clang doesn't seem to do so,
>> but
>>>>> instead
>>>>> does 2 stores when writing back the 64 value. (I tried with both
>>>>> volatile
>>>>> and non-volatile 64-bit values, just to see if volatile would
>>>> encourage
>>>>> clang to do a single store).
>>>>>
>>>>> GCC: https://godbolt.org/z/9eqKfT3hz
>>>>> Clang: https://godbolt.org/z/PT5EqKn4c
>>>>
>>>> Interesting.
>>>> I guess this can be fixed by manually implementing what GCC does.
>>>>
>>>> I'm more concerned about finding a high-performance (in the fast
>> path)
>>>> 64 bit counter solution for 32 bit ARM.
>>>
>>> Reading up on the topic, and continuing Bruce's experiment on Godbolt,
>> it is possible on 32 bit ARMv7-A too, using LDRD/STRD (Load/Store
>> Register Dual) instructions, which load/store 64 bit from memory into
>> two registers at once.
>>>
>>> Clang is emits more efficient code without volatile.
>>> GCC requires volatile to use STRD.
>>>
>>> Clang: https://godbolt.org/z/WjdTq6EKh
>>> GCC: https://godbolt.org/z/qq9j7d4Ea
>>>
>>> Summing it up, it is possible to implement non-tearing 64 bit high-
>> performance (lockless, barrier-free) counters on the 32 bit
>> architectures supported by DPDK.
>>>
>>> But the implementation is both architecture and compiler specific.
>>> So it seems a "64 bit counters" library would be handy. (Or a "non-
>> tearing 64 bit integers" library, for support of the signed variant too;
>> but I don't think we need that.)
>>> We can use uint64_t as the underlying type and type cast in the
>> library (where needed by the specific compiler/architecture), or
>> introduce a new rte_ctr64_t type to ensure that accessor functions are
>> always used and the developer doesn't have to worry about tearing on 32
>> bit architectures.
>>>
>>> The most simple variant of such a library only provides load and store
>> functions. The API would look like this:
>>>
>>> uint64_t inline
>>> rte_ctr64_get(const rte_ctr64_t *const ctr);
>>>
>>> void inline
>>> rte_ctr64_set(rte_ctr64_t *const ctr, const uint64_t value);
>>>
>>> And if some CPU offers special instructions for increment or addition,
>> faster (regarding performance) and/or more compact (regarding
>> instruction memory) than a sequence of load-add-store instructions:
>>>
>>> void inline
>>> rte_ctr64_inc(rte_ctr64_t *const ctr);
>>>
>>> void inline
>>> rte_ctr64_add(rte_ctr64_t *const ctr, const uint64_t value);
> 
> Note: 32 bit architectures might achieve higher performance if the "value" parameter to rte_ctr64_add() is unsigned long (or unsigned int) instead of uint64_t.
> 

If the rte_ctr64_add() implementation is kept in the header file (or, 
always in case of LTO), I don't see how that could make a difference, 
assuming the operand is of a smaller (<64 bits) type.

>>>
>>> <feature creep>
>>> And perhaps atomic variants of all these functions, with explicit
>> and/or relaxed memory ordering, for counters shared by multiple threads.
>>> </feature creep>
>>>
>>
>>
>> This kind of what I am experimenting with but...
> 
> Excellent!
> 
>> Intend to keep the details of the counters read and update in one file
>> and not as inlines.
> 
> Please note that traffic management applications maintain many counters (e.g. per-flow, per-subscriber and per-link packet and byte counters, some also per QoS class), so rte_ctr64_add() must have the highest performance technically possible.
> 
> For reference, the packet scheduling code in our application updates 28 statistics counters per burst of packets. (In addition to internal state variables for e.g. queue lenghts.)
> 
> Furthermore, our application processes and displays live statistics with one second granularity, using a separate thread. Although statistics processing is not part of the fast path, the sheer number of counters processed requires high performance read access to those counters.
> 
> 
> <more feature creep>
> Some groups of counters are maintained locally in the inner loop, and then added in bulk to the "public" statistics afterwards. Conceptually:
> 
> struct stats_per_prio {
> 	uint64_t	forwarded_bytes;
> 	uint64_t	forwarded_packets;
> 	uint64_t	marked_bytes;
> 	uint64_t	marked_packets;
> 	uint64_t	dropped_bytes;
> 	uint64_t	dropped_packets;
> };
> 
> If this is a common design pattern in DPDK (drivers, libraries and/or applications), perhaps also provide a performance optimized function for architectures offering vector instructions:
> 
> void inline
> rte_ctr64_add_bulk(
> 		rte_ctr64_t *const ctrs,
> 		const unsigned long *const values,
> 		const unsigned int n /* compile time constant */);
> 
> This slightly resembles the Linux kernel's design pattern, where counters are updated in bulk, protected by a common lock for the bulk update. (However, DPDK has no lock, so the motivation for optimizing for this design pattern is only "nice to have".)
> 
> PS:
> Many DPDK applications are 64 bit only, including the SmartShare appliances, and can easily manage 64 bit counters without all this.
> However, if the DPDK project still considers 32 bit architectures first class citizens, 64 bit counters should have high performance on 32 bit architectures too.
> 

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-08 15:23       ` Stephen Hemminger
  2024-05-08 19:48         ` Ferruh Yigit
@ 2024-05-26  7:21         ` Mattias Rönnblom
  2024-10-04 17:40           ` Stephen Hemminger
  1 sibling, 1 reply; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-26  7:21 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Ferruh Yigit, John W. Linville, Thomas Monjalon, dev,
	Mattias Rönnblom, Morten Brørup

On 2024-05-08 17:23, Stephen Hemminger wrote:
> On Wed, 8 May 2024 09:19:02 +0200
> Mattias Rönnblom <hofors@lysator.liu.se> wrote:
> 
>> On 2024-05-04 00:00, Stephen Hemminger wrote:
>>> On Fri, 3 May 2024 16:45:47 +0100
>>> Ferruh Yigit <ferruh.yigit@amd.com> wrote:
>>>    
>>>> For stats reset, use an offset instead of zeroing out actual stats values,
>>>> get_stats() displays diff between stats and offset.
>>>> This way stats only updated in datapath and offset only updated in stats
>>>> reset function. This makes stats reset function more reliable.
>>>>
>>>> As stats only written by single thread, we can remove 'volatile' qualifier
>>>> which should improve the performance in datapath.
>>>>
>>>> While updating around, 'igb_stats' parameter renamed as 'stats'.
>>>>
>>>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
>>>> ---
>>>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>>>> Cc: Stephen Hemminger <stephen@networkplumber.org>
>>>> Cc: Morten Brørup <mb@smartsharesystems.com>
>>>>
>>>> This update triggered by mail list discussion [1].
>>>>
>>>> [1]
>>>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/
>>>
>>>
>>> NAK
>>>
>>> I did not hear a good argument why atomic or volatile was necessary in the first place.
>>> Why?
>>>    
>>
>> On the reader side, loads should be atomic.
>> On the writer side, stores should be atomic.
>>
>> Updates (stores) should actually occur in a timely manner. The complete
>> read-modify-write cycle need not be atomic, since we only have a single
>> writer. All this for the per-lcore counter case.
>>
>> If load or store tearing occurs, the counter values may occasionally
>> take totally bogus values. I think that should be avoided. Especially
>> since it will likely come at a very reasonable cost.
>>
>>   From what it seems to me, load or store tearing may well occur. GCC may
>> generate two 32-bit stores for a program-level 64-bit store on 32-bit
>> x86. If you have constant and immediate-data store instructions,
>> constant writes may also be end up teared. The kernel documentation has
>> some example of this. Add LTO, it's not necessarily going to be all that
>> clear what is storing-a-constant and what is not.
>>
>> Maybe you care a little less if statistics are occasionally broken, or
>> some transient, inconsistent state, but generally they should work, and
>> they should never have some totally bogus values. So, statistics aren't
>> snow flakes, mostly just business as usual.
>>
>> We can't both have a culture that promotes C11-style parallel
>> programming, or, at the extreme, push the C11 APIs as-is, and the say
>> "and btw you don't have to care about the standard when it comes to
>> statistics".
>>
>> We could adopt the Linux kernel's rules, programming model, and APIs
>> (ignoring legal issues). That would be very old school, maybe somewhat
>> over-engineered for our purpose, include a fair amount of inline
>> assembler, and also and may well depend on GCC or GCC-like compilers,
>> just like what I believe the kernel does.
>>
>> We could use something in-between, heavily inspired by C11 but still
>> with an opportunity to work around compiler issues, library issues, and
>> extend the API for our use case.
>>
>> I agree we shouldn't have to mark statistics _Atomic, or RTE_ATOMIC(),
>> rte_atomic64_t, or rte_sometimes_atomic_and_sometimes_not64_t. Just
>> keeping the usual C integer types seems like a better option to me.
>>
>>> Why is this driver special (a snowflake) compared to all the other drivers doing software
>>> statistics (tap, virtio, xdp, ring, memif, netvsc, vmware)?
>>
>> If a broken piece of code has been copied around, one place is going to
>> be the first to be fixed.
> 
> 
> I dislike when any driver does something completely different than valid precedent.
> No other driver in DPDK, Vpp, FreeBSD, Linux (and probably Windows) uses atomic for
> updating statistics. We even got performance benefit at MS from removing atomic
> increment of staistics in internal layers.
> 

All of those are using atomic stores when updating the statistics, I'm 
sure. Assuring a store being atomic is one thing, and assuring the whole 
read-modify-write cycle is atomic is a completely different (and very 
much more expensive) thing.

> The idea of load tearing is crazy talk of integral types. It would break so many things.
> It is the kind of stupid compiler thing that would send Linus on a rant and get
> the GCC compiler writers in trouble.
> 

On 32-bit x86, store tearing for 64-bit integral types is the order of 
the day.

For <64 bit types, I agree. The only cases (like the one listed in the 
kernel documentation), are going to be rare indeed.

> The DPDK has always favored performance over strict safety guard rails everywhere.
> Switching to making every statistic an atomic operation is not in the spirit of
> what is required. There is no strict guarantee necessary here.
> 

I think we agree but just use different terminology.


^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v3 1/7] eal: generic 64 bit counter
  2024-05-15  9:30       ` Morten Brørup
  2024-05-15 15:03         ` Stephen Hemminger
@ 2024-05-26  7:34         ` Mattias Rönnblom
  1 sibling, 0 replies; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-26  7:34 UTC (permalink / raw)
  To: Morten Brørup, Stephen Hemminger, dev,
	Mattias Rönnblom, Ferruh Yigit, bruce.richardson

On 2024-05-15 11:30, Morten Brørup wrote:
> +To: @Mattias, @Ferruh, @Bruce, participants in a related discussion
> 
>> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
>> Sent: Tuesday, 14 May 2024 17.35
>>
>> This header implements 64 bit counters that are NOT atomic
>> but are safe against load/store splits on 32 bit platforms.
>>
>> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
>> Acked-by: Morten Brørup <mb@smartsharesystems.com>
>> ---
> 
> With a long term perspective, I consider this patch very useful.
> And its 32 bit implementation can be optimized for various architectures/compilers later.
> 
> 
> In addition, it would be "nice to have" if reset() and fetch() could be called from another thread than the thread adding to the counter.
> 

reset() from a different thread, and you enter the "multiple writer" 
domain. Both reset and add needs to be atomic over the whole 
read-modify-write cycle. (Add and reset are really no different.)

...unless you keep an offset per counter, per the discussion in the 
other thread, where I proposed something like that as a common counter 
API (in case we really needed MT safe reset).

It seems to me that we shouldn't provide a MT safe reset. By some means, 
the user must assure there is only a single writer (the "resetter" or 
the "adder").

> As previously discussed [1], I think it can be done without significantly affecting fast path add() performance, by using an "offset" with Release-Consume ordering.
> 
> [1]: https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F427@smartserver.smartshare.dk/
> 
> 
> rte_counter64_add(rte_counter64_t *counter, uint32_t val)
> {
> 	// Write "counter" with memory_order_relaxed, so
> 	// it eventually becomes visible in other threads.
> 
> 	rte_counter64_t ctr = *counter + val;
> 	rte_atomic_store_explicit(counter, ctr, rte_memory_order_relaxed);
> }
> 
> rte_counter64_get(rte_counter64_t *counter, rte_counter64_t *offset)
> {
> 	// Read "offset" with memory_order_consume, so:
> 	// - no reads or writes in the current thread dependent on "offset"
> 	//   can be reordered before this load, and
> 	// - writes to "counter" (a data-dependent variable)
> 	//   in other threads that release "offset" are visible in the current thread.
> 
> 	rte_counter64_t off = rte_atomic_load_explicit(offset, rte_memory_order_consume);
> 	rte_counter64_t ctr = rte_atomic_load_explicit(counter, rte_memory_order_relaxed);
> 
> 	return ctr - off;
> }
> 
> rte_counter64_reset(rte_counter64_t *counter, rte_counter64_t *offset)
> {
> 	// Write "offset" with memory_order_release, so
> 	// "counter" cannot be visible after it.
> 
> 	rte_counter64_t ctr = rte_atomic_load_explicit(offset, rte_memory_order_relaxed);
> 	rte_atomic_store_explicit(offset, ctr, rte_memory_order_release);
> }
> 
> 
> Support for counters shared by multi threads, e.g. rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed, should be provided too:
> 
> rte_counter64_mt_add(rte_counter64_t *counter, uint32_t val)
> {
> 	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
> }
> 
> 
>>   lib/eal/include/meson.build   |  1 +
>>   lib/eal/include/rte_counter.h | 91 +++++++++++++++++++++++++++++++++++
>>   2 files changed, 92 insertions(+)
>>   create mode 100644 lib/eal/include/rte_counter.h
>>
>> diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
>> index e94b056d46..c070dd0079 100644
>> --- a/lib/eal/include/meson.build
>> +++ b/lib/eal/include/meson.build
>> @@ -12,6 +12,7 @@ headers += files(
>>           'rte_class.h',
>>           'rte_common.h',
>>           'rte_compat.h',
>> +        'rte_counter.h',
>>           'rte_debug.h',
>>           'rte_dev.h',
>>           'rte_devargs.h',
>> diff --git a/lib/eal/include/rte_counter.h b/lib/eal/include/rte_counter.h
>> new file mode 100644
>> index 0000000000..8068d6d26e
>> --- /dev/null
>> +++ b/lib/eal/include/rte_counter.h
>> @@ -0,0 +1,91 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright (c) Stephen Hemminger <stephen@networkplumber.org>
>> + */
>> +
>> +#ifndef _RTE_COUNTER_H_
>> +#define _RTE_COUNTER_H_
>> +
>> +#ifdef __cplusplus
>> +extern "C" {
>> +#endif
>> +
>> +/**
>> + * @file
>> + * RTE Counter
>> + *
>> + * A counter is 64 bit value that is safe from split read/write
>> + * on 32 bit platforms. It assumes that only one cpu at a time
>> + * will update the counter, and another CPU may want to read it.
>> + *
>> + * This is a much weaker guarantee than full atomic variables
>> + * but is faster since no locked operations are required for update.
>> + */
>> +
>> +#include <stdatomic.h>
>> +
>> +#ifdef RTE_ARCH_64
>> +/*
>> + * On a platform that can support native 64 bit type, no special handling.
>> + * These are just wrapper around 64 bit value.
>> + */
>> +typedef uint64_t rte_counter64_t;
>> +
>> +/**
>> + * Add value to counter.
>> + */
>> +__rte_experimental
>> +static inline void
>> +rte_counter64_add(rte_counter64_t *counter, uint32_t val)
>> +{
>> +	*counter += val;
>> +}
>> +
>> +__rte_experimental
>> +static inline uint64_t
>> +rte_counter64_fetch(const rte_counter64_t *counter)
>> +{
>> +	return *counter;
>> +}
>> +
>> +__rte_experimental
>> +static inline void
>> +rte_counter64_reset(rte_counter64_t *counter)
>> +{
>> +	*counter = 0;
>> +}
>> +
>> +#else
>> +/*
>> + * On a 32 bit platform need to use atomic to force the compler to not
>> + * split 64 bit read/write.
>> + */
>> +typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
>> +
>> +__rte_experimental
>> +static inline void
>> +rte_counter64_add(rte_counter64_t *counter, uint32_t val)
>> +{
>> +	rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed);
>> +}
>> +
>> +__rte_experimental
>> +static inline uint64_t
>> +rte_counter64_fetch(const rte_counter64_t *counter)
>> +{
>> +	return rte_atomic_load_explicit(counter, rte_memory_order_relaxed);
>> +}
>> +
>> +__rte_experimental
>> +static inline void
>> +rte_counter64_reset(rte_counter64_t *counter)
>> +{
>> +	rte_atomic_store_explicit(counter, 0, rte_memory_order_relaxed);
>> +}
>> +#endif
>> +
>> +
>> +#ifdef __cplusplus
>> +}
>> +#endif
>> +
>> +#endif /* _RTE_COUNTER_H_ */
>> --
>> 2.43.0
> 

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v9 1/8] eal: generic 64 bit counter
  2024-05-22 19:01             ` Tyler Retzlaff
  2024-05-22 19:51               ` Stephen Hemminger
@ 2024-05-26 14:39               ` Mattias Rönnblom
  1 sibling, 0 replies; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-26 14:39 UTC (permalink / raw)
  To: Tyler Retzlaff, Morten Brørup; +Cc: Stephen Hemminger, dev

On 2024-05-22 21:01, Tyler Retzlaff wrote:
> On Wed, May 22, 2024 at 07:57:01PM +0200, Morten Brørup wrote:
>>> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
>>> Sent: Wednesday, 22 May 2024 17.38
>>>
>>> On Wed, 22 May 2024 10:31:39 +0200
>>> Morten Brørup <mb@smartsharesystems.com> wrote:
>>>
>>>>> +/* On 32 bit platform, need to use atomic to avoid load/store
>>> tearing */
>>>>> +typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
>>>>
>>>> As shown by Godbolt experiments discussed in a previous thread [2],
>>> non-tearing 64 bit counters can be implemented without using atomic
>>> instructions on all 32 bit architectures supported by DPDK. So we should
>>> use the counter/offset design pattern for RTE_ARCH_32 too.
>>>>
>>>> [2]:
>>> https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F433@smarts
>>> erver.smartshare.dk/
>>>
>>>
>>> This code built with -O3 and -m32 on godbolt shows split problem.
>>>
>>> #include <stdint.h>
>>>
>>> typedef uint64_t rte_counter64_t;
>>>
>>> void
>>> rte_counter64_add(rte_counter64_t *counter, uint32_t val)
>>> {
>>> 	*counter += val;
>>> }
>>> …	*counter = val;
>>> }
>>>
>>> rte_counter64_add:
>>>          push    ebx
>>>          mov     eax, DWORD PTR [esp+8]
>>>          xor     ebx, ebx
>>>          mov     ecx, DWORD PTR [esp+12]
>>>          add     DWORD PTR [eax], ecx
>>>          adc     DWORD PTR [eax+4], ebx
>>>          pop     ebx
>>>          ret
>>>
>>> rte_counter64_read:
>>>          mov     eax, DWORD PTR [esp+4]
>>>          mov     edx, DWORD PTR [eax+4]
>>>          mov     eax, DWORD PTR [eax]
>>>          ret
>>> rte_counter64_set:
>>>          movq    xmm0, QWORD PTR [esp+8]
>>>          mov     eax, DWORD PTR [esp+4]
>>>          movq    QWORD PTR [eax], xmm0
>>>          ret
>>
>> Sure, atomic might be required on some 32 bit architectures and/or with some compilers.
> 
> in theory i think you should be able to use generic atomics and
> depending on the target you get codegen that works. it might be
> something more expensive on 32-bit and nothing on 64-bit etc..
> 
> what's the damage if we just use atomic generic and relaxed ordering? is
> the codegen not optimal?
>   

Below is what I originally proposed in the "make stats reset reliable" 
thread.

struct counter
{
     uint64_t count;
     uint64_t offset;
};

/../
     struct counter rx_pkts;
     struct counter rx_bytes;
/../

static uint64_t
counter_value(const struct counter *counter)
{
     uint64_t count = __atomic_load_n(&counter->count, __ATOMIC_RELAXED);
     uint64_t offset = __atomic_load_n(&counter->offset, __ATOMIC_RELAXED);

     return count - offset;
}

static void
counter_reset(struct counter *counter)
{
     uint64_t count = __atomic_load_n(&counter->count, __ATOMIC_RELAXED);

     __atomic_store_n(&counter->offset, count, __ATOMIC_RELAXED);
}

static void
counter_add(struct counter *counter, uint64_t operand)
{
     __atomic_store_n(&counter->count, counter->count + operand, 
__ATOMIC_RELAXED);
}

I think this solution generally compiles to something that's equivalent 
to just using non-atomic loads/stores and hope for the best.

Using a non-atomic load in counter_add() will generate better code, but 
doesn't work if you using _Atomic (w/o casts).

Atomic load/stores seems to have volatile semantics, so multiple counter 
updates to the same counter cannot be merged. That is a drawback.

>> I envision a variety of 32 bit implementations, optimized for certain architectures/compilers.
>>
>> Some of them can provide non-tearing 64 bit load/store, so we should also use the counter/offset design pattern for those.
>>

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v9 1/8] eal: generic 64 bit counter
  2024-05-22 19:51               ` Stephen Hemminger
@ 2024-05-26 14:46                 ` Mattias Rönnblom
  0 siblings, 0 replies; 179+ messages in thread
From: Mattias Rönnblom @ 2024-05-26 14:46 UTC (permalink / raw)
  To: Stephen Hemminger, Tyler Retzlaff; +Cc: Morten Brørup, dev

On 2024-05-22 21:51, Stephen Hemminger wrote:
> On Wed, 22 May 2024 12:01:12 -0700
> Tyler Retzlaff <roretzla@linux.microsoft.com> wrote:
> 
>> On Wed, May 22, 2024 at 07:57:01PM +0200, Morten Brørup wrote:
>>>> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
>>>> Sent: Wednesday, 22 May 2024 17.38
>>>>
>>>> On Wed, 22 May 2024 10:31:39 +0200
>>>> Morten Brørup <mb@smartsharesystems.com> wrote:
>>>>    
>>>>>> +/* On 32 bit platform, need to use atomic to avoid load/store
>>>> tearing */
>>>>>> +typedef RTE_ATOMIC(uint64_t) rte_counter64_t;
>>>>>
>>>>> As shown by Godbolt experiments discussed in a previous thread [2],
>>>> non-tearing 64 bit counters can be implemented without using atomic
>>>> instructions on all 32 bit architectures supported by DPDK. So we should
>>>> use the counter/offset design pattern for RTE_ARCH_32 too.
>>>>>
>>>>> [2]:
>>>> https://inbox.dpdk.org/dev/98CBD80474FA8B44BF855DF32C47DC35E9F433@smarts
>>>> erver.smartshare.dk/
>>>>
>>>>
>>>> This code built with -O3 and -m32 on godbolt shows split problem.
>>>>
>>>> #include <stdint.h>
>>>>
>>>> typedef uint64_t rte_counter64_t;
>>>>
>>>> void
>>>> rte_counter64_add(rte_counter64_t *counter, uint32_t val)
>>>> {
>>>> 	*counter += val;
>>>> }
>>>> …	*counter = val;
>>>> }
>>>>
>>>> rte_counter64_add:
>>>>          push    ebx
>>>>          mov     eax, DWORD PTR [esp+8]
>>>>          xor     ebx, ebx
>>>>          mov     ecx, DWORD PTR [esp+12]
>>>>          add     DWORD PTR [eax], ecx
>>>>          adc     DWORD PTR [eax+4], ebx
>>>>          pop     ebx
>>>>          ret
>>>>
>>>> rte_counter64_read:
>>>>          mov     eax, DWORD PTR [esp+4]
>>>>          mov     edx, DWORD PTR [eax+4]
>>>>          mov     eax, DWORD PTR [eax]
>>>>          ret
>>>> rte_counter64_set:
>>>>          movq    xmm0, QWORD PTR [esp+8]
>>>>          mov     eax, DWORD PTR [esp+4]
>>>>          movq    QWORD PTR [eax], xmm0
>>>>          ret
>>>
>>> Sure, atomic might be required on some 32 bit architectures and/or with some compilers.
>>
>> in theory i think you should be able to use generic atomics and
>> depending on the target you get codegen that works. it might be
>> something more expensive on 32-bit and nothing on 64-bit etc..
>>
>> what's the damage if we just use atomic generic and relaxed ordering? is
>> the codegen not optimal?
> 
> If we use atomic with relaxed memory order, then compiler for x86 still generates
> a locked increment in the fast path. This costs about 100 extra cycles due
> to cache and prefetch stall. This whole endeavor is an attempt to avoid that.
> 

It's because the code is overly restrictive (e.g., needlessly forcing 
the whole read-modify-read being atomic), in that case, and no fault of 
the compiler.

void add(uint64_t *addr, uint64_t operand)
{
     uint64_t value = __atomic_load_n(addr, __ATOMIC_RELAXED);
     value += operand;
     __atomic_store_n(addr, value, __ATOMIC_RELAXED);
}

->

x86_64

add:
         mov     rax, QWORD PTR [rdi]
         add     rax, rsi
         mov     QWORD PTR [rdi], rax
         ret


x86

add:
         sub     esp, 12
         mov     ecx, DWORD PTR [esp+16]
         movq    xmm0, QWORD PTR [ecx]
         movq    QWORD PTR [esp], xmm0
         mov     eax, DWORD PTR [esp]
         mov     edx, DWORD PTR [esp+4]
         add     eax, DWORD PTR [esp+20]
         adc     edx, DWORD PTR [esp+24]
         mov     DWORD PTR [esp], eax
         mov     DWORD PTR [esp+4], edx
         movq    xmm1, QWORD PTR [esp]
         movq    QWORD PTR [ecx], xmm1
         add     esp, 12
         ret

No locked instructions.

> PS: looking at the locked increment code for 32 bit involves locked compare
> exchange and potential retry. Probably don't care about performance on that platform
> anymore.
> 
> 

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [RFC v3] net/af_packet: make stats reset reliable
  2024-05-26  7:21         ` Mattias Rönnblom
@ 2024-10-04 17:40           ` Stephen Hemminger
  0 siblings, 0 replies; 179+ messages in thread
From: Stephen Hemminger @ 2024-10-04 17:40 UTC (permalink / raw)
  To: Mattias Rönnblom
  Cc: Ferruh Yigit, John W. Linville, Thomas Monjalon, dev,
	Mattias Rönnblom, Morten Brørup

On Sun, 26 May 2024 09:21:55 +0200
Mattias Rönnblom <hofors@lysator.liu.se> wrote:

> On 2024-05-08 17:23, Stephen Hemminger wrote:
> > On Wed, 8 May 2024 09:19:02 +0200
> > Mattias Rönnblom <hofors@lysator.liu.se> wrote:
> >   
> >> On 2024-05-04 00:00, Stephen Hemminger wrote:  
> >>> On Fri, 3 May 2024 16:45:47 +0100
> >>> Ferruh Yigit <ferruh.yigit@amd.com> wrote:
> >>>      
> >>>> For stats reset, use an offset instead of zeroing out actual stats values,
> >>>> get_stats() displays diff between stats and offset.
> >>>> This way stats only updated in datapath and offset only updated in stats
> >>>> reset function. This makes stats reset function more reliable.
> >>>>
> >>>> As stats only written by single thread, we can remove 'volatile' qualifier
> >>>> which should improve the performance in datapath.
> >>>>
> >>>> While updating around, 'igb_stats' parameter renamed as 'stats'.
> >>>>
> >>>> Signed-off-by: Ferruh Yigit <ferruh.yigit@amd.com>
> >>>> ---
> >>>> Cc: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> >>>> Cc: Stephen Hemminger <stephen@networkplumber.org>
> >>>> Cc: Morten Brørup <mb@smartsharesystems.com>
> >>>>
> >>>> This update triggered by mail list discussion [1].
> >>>>
> >>>> [1]
> >>>> https://inbox.dpdk.org/dev/3b2cf48e-2293-4226-b6cd-5f4dd3969f99@lysator.liu.se/  
> >>>
> >>>
> >>> NAK
> >>>
> >>> I did not hear a good argument why atomic or volatile was necessary in the first place.
> >>> Why?
> >>>      
> >>
> >> On the reader side, loads should be atomic.
> >> On the writer side, stores should be atomic.
> >>
> >> Updates (stores) should actually occur in a timely manner. The complete
> >> read-modify-write cycle need not be atomic, since we only have a single
> >> writer. All this for the per-lcore counter case.
> >>
> >> If load or store tearing occurs, the counter values may occasionally
> >> take totally bogus values. I think that should be avoided. Especially
> >> since it will likely come at a very reasonable cost.
> >>
> >>   From what it seems to me, load or store tearing may well occur. GCC may
> >> generate two 32-bit stores for a program-level 64-bit store on 32-bit
> >> x86. If you have constant and immediate-data store instructions,
> >> constant writes may also be end up teared. The kernel documentation has
> >> some example of this. Add LTO, it's not necessarily going to be all that
> >> clear what is storing-a-constant and what is not.
> >>
> >> Maybe you care a little less if statistics are occasionally broken, or
> >> some transient, inconsistent state, but generally they should work, and
> >> they should never have some totally bogus values. So, statistics aren't
> >> snow flakes, mostly just business as usual.
> >>
> >> We can't both have a culture that promotes C11-style parallel
> >> programming, or, at the extreme, push the C11 APIs as-is, and the say
> >> "and btw you don't have to care about the standard when it comes to
> >> statistics".
> >>
> >> We could adopt the Linux kernel's rules, programming model, and APIs
> >> (ignoring legal issues). That would be very old school, maybe somewhat
> >> over-engineered for our purpose, include a fair amount of inline
> >> assembler, and also and may well depend on GCC or GCC-like compilers,
> >> just like what I believe the kernel does.
> >>
> >> We could use something in-between, heavily inspired by C11 but still
> >> with an opportunity to work around compiler issues, library issues, and
> >> extend the API for our use case.
> >>
> >> I agree we shouldn't have to mark statistics _Atomic, or RTE_ATOMIC(),
> >> rte_atomic64_t, or rte_sometimes_atomic_and_sometimes_not64_t. Just
> >> keeping the usual C integer types seems like a better option to me.
> >>  
> >>> Why is this driver special (a snowflake) compared to all the other drivers doing software
> >>> statistics (tap, virtio, xdp, ring, memif, netvsc, vmware)?  
> >>
> >> If a broken piece of code has been copied around, one place is going to
> >> be the first to be fixed.  
> > 
> > 
> > I dislike when any driver does something completely different than valid precedent.
> > No other driver in DPDK, Vpp, FreeBSD, Linux (and probably Windows) uses atomic for
> > updating statistics. We even got performance benefit at MS from removing atomic
> > increment of staistics in internal layers.
> >   
> 
> All of those are using atomic stores when updating the statistics, I'm 
> sure. Assuring a store being atomic is one thing, and assuring the whole 
> read-modify-write cycle is atomic is a completely different (and very 
> much more expensive) thing.
> 
> > The idea of load tearing is crazy talk of integral types. It would break so many things.
> > It is the kind of stupid compiler thing that would send Linus on a rant and get
> > the GCC compiler writers in trouble.
> >   
> 
> On 32-bit x86, store tearing for 64-bit integral types is the order of 
> the day.
> 
> For <64 bit types, I agree. The only cases (like the one listed in the 
> kernel documentation), are going to be rare indeed.
> 
> > The DPDK has always favored performance over strict safety guard rails everywhere.
> > Switching to making every statistic an atomic operation is not in the spirit of
> > what is required. There is no strict guarantee necessary here.
> >   
> 
> I think we agree but just use different terminology.
> 

Don't think any real conclusion was reached on this.
Af_packet is a less used driver, more concerned about virtio and xdp drivers.
Marking this patch as changes requested, since still under discussion.

^ permalink raw reply	[flat|nested] 179+ messages in thread

end of thread, other threads:[~2024-10-04 17:40 UTC | newest]

Thread overview: 179+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-25 17:46 [RFC] net/af_packet: make stats reset reliable Ferruh Yigit
2024-04-26 11:33 ` Morten Brørup
2024-04-26 13:37   ` Ferruh Yigit
2024-04-26 14:56     ` Morten Brørup
2024-04-28 15:42   ` Mattias Rönnblom
2024-04-26 14:38 ` [RFC v2] " Ferruh Yigit
2024-04-26 14:47   ` Morten Brørup
2024-04-28 15:11   ` Mattias Rönnblom
2024-05-01 16:19     ` Ferruh Yigit
2024-05-02  5:51       ` Mattias Rönnblom
2024-05-02 14:22         ` Ferruh Yigit
2024-05-02 15:59           ` Stephen Hemminger
2024-05-02 18:20             ` Ferruh Yigit
2024-05-02 17:37           ` Mattias Rönnblom
2024-05-02 18:26             ` Stephen Hemminger
2024-05-02 21:26               ` Mattias Rönnblom
2024-05-02 21:46                 ` Stephen Hemminger
2024-05-07  7:23     ` Mattias Rönnblom
2024-05-07 13:49       ` Ferruh Yigit
2024-05-07 14:51         ` Stephen Hemminger
2024-05-07 16:00           ` Morten Brørup
2024-05-07 16:54             ` Ferruh Yigit
2024-05-07 18:47               ` Stephen Hemminger
2024-05-08  7:48             ` Mattias Rönnblom
2024-05-08  6:28           ` Mattias Rönnblom
2024-05-08  6:25         ` Mattias Rönnblom
2024-05-07 19:19       ` Morten Brørup
2024-05-08  6:34         ` Mattias Rönnblom
2024-05-08  7:10           ` Morten Brørup
2024-05-08  7:23             ` Mattias Rönnblom
2024-04-26 21:28 ` [RFC] " Patrick Robb
2024-05-03 15:45 ` [RFC v3] " Ferruh Yigit
2024-05-03 22:00   ` Stephen Hemminger
2024-05-07 13:48     ` Ferruh Yigit
2024-05-07 14:52       ` Stephen Hemminger
2024-05-07 17:27         ` Ferruh Yigit
2024-05-08  7:19     ` Mattias Rönnblom
2024-05-08 15:23       ` Stephen Hemminger
2024-05-08 19:48         ` Ferruh Yigit
2024-05-08 20:54           ` Stephen Hemminger
2024-05-09  7:43             ` Morten Brørup
2024-05-09  9:29               ` Bruce Richardson
2024-05-09 11:37                 ` Morten Brørup
2024-05-09 14:19                   ` Morten Brørup
2024-05-10  4:56                     ` Stephen Hemminger
2024-05-10  9:14                       ` Morten Brørup
2024-05-26  7:10                         ` Mattias Rönnblom
2024-05-26  7:07                     ` Mattias Rönnblom
2024-05-26  7:03                   ` Mattias Rönnblom
2024-05-26  7:21         ` Mattias Rönnblom
2024-10-04 17:40           ` Stephen Hemminger
2024-05-07 15:27   ` Morten Brørup
2024-05-07 17:40     ` Ferruh Yigit
2024-05-10  5:01 ` [RFC 0/3] generic sw counters Stephen Hemminger
2024-05-10  5:01   ` [RFC 1/3] ethdev: add internal helper of SW driver statistics Stephen Hemminger
2024-05-10  5:01   ` [RFC 2/3] net/af_packet: use SW stats helper Stephen Hemminger
2024-05-10  5:01   ` [RFC 3/3] net/tap: use generic SW stats Stephen Hemminger
2024-05-10 17:29   ` [RFC 0/3] generic sw counters Morten Brørup
2024-05-10 19:30     ` Stephen Hemminger
2024-05-13 18:52   ` [RFC v2 0/7] generic SW counters Stephen Hemminger
2024-05-13 18:52     ` [RFC v2 1/7] eal: generic 64 bit counter Stephen Hemminger
2024-05-13 19:36       ` Morten Brørup
2024-05-13 18:52     ` [RFC v2 2/7] ethdev: add internal helper of SW driver statistics Stephen Hemminger
2024-05-13 18:52     ` [RFC v2 3/7] net/af_packet: use SW stats helper Stephen Hemminger
2024-05-13 18:52     ` [RFC v2 4/7] net/tap: use generic SW stats Stephen Hemminger
2024-05-13 18:52     ` [RFC v2 5/7] net/pcap: " Stephen Hemminger
2024-05-13 18:52     ` [RFC v2 6/7] net/af_xdp: " Stephen Hemminger
2024-05-13 18:52     ` [RFC v2 7/7] net/ring: " Stephen Hemminger
2024-05-14 15:35   ` [PATCH v3 0/7] Generic SW counters Stephen Hemminger
2024-05-14 15:35     ` [PATCH v3 1/7] eal: generic 64 bit counter Stephen Hemminger
2024-05-15  9:30       ` Morten Brørup
2024-05-15 15:03         ` Stephen Hemminger
2024-05-15 16:18           ` Morten Brørup
2024-05-26  7:34         ` Mattias Rönnblom
2024-05-26  6:45       ` Mattias Rönnblom
2024-05-14 15:35     ` [PATCH v3 2/7] ethdev: add internal helper of SW driver statistics Stephen Hemminger
2024-05-14 15:35     ` [PATCH v3 3/7] net/af_packet: use SW stats helper Stephen Hemminger
2024-05-14 15:35     ` [PATCH v3 4/7] net/af_xdp: use generic SW stats Stephen Hemminger
2024-05-14 15:35     ` [PATCH v3 5/7] net/pcap: " Stephen Hemminger
2024-05-14 15:35     ` [PATCH v3 6/7] net/ring: " Stephen Hemminger
2024-05-14 15:35     ` [PATCH v3 7/7] net/tap: " Stephen Hemminger
2024-05-15 23:40   ` [PATCH v4 0/8] Generic 64 bit counters for SW PMD's Stephen Hemminger
2024-05-15 23:40     ` [PATCH v4 1/8] eal: generic 64 bit counter Stephen Hemminger
2024-05-15 23:40     ` [PATCH v4 2/8] ethdev: add common counters for statistics Stephen Hemminger
2024-05-15 23:40     ` [PATCH v4 3/8] net/af_packet: use generic SW stats Stephen Hemminger
2024-05-15 23:40     ` [PATCH v4 4/8] net/af_xdp: " Stephen Hemminger
2024-05-15 23:40     ` [PATCH v4 5/8] net/pcap: " Stephen Hemminger
2024-05-15 23:40     ` [PATCH v4 6/8] net/ring: " Stephen Hemminger
2024-05-15 23:40     ` [PATCH v4 7/8] net/tap: " Stephen Hemminger
2024-05-15 23:41     ` [PATCH v4 8/8] net/null: " Stephen Hemminger
2024-05-16 15:40   ` [PATCH v5 0/9] Generic 64 bit counters Stephen Hemminger
2024-05-16 15:40     ` [PATCH v5 1/9] eal: generic 64 bit counter Stephen Hemminger
2024-05-16 18:22       ` Wathsala Wathawana Vithanage
2024-05-16 21:42         ` Stephen Hemminger
2024-05-17  2:39           ` Honnappa Nagarahalli
2024-05-17  3:29             ` Stephen Hemminger
2024-05-17  4:39               ` Honnappa Nagarahalli
2024-05-16 15:40     ` [PATCH v5 2/9] ethdev: add common counters for statistics Stephen Hemminger
2024-05-16 18:30       ` Wathsala Wathawana Vithanage
2024-05-17  0:19         ` Stephen Hemminger
2024-05-16 15:40     ` [PATCH v5 3/9] net/af_packet: use generic SW stats Stephen Hemminger
2024-05-16 15:40     ` [PATCH v5 4/9] net/af_xdp: " Stephen Hemminger
2024-05-16 15:40     ` [PATCH v5 5/9] net/pcap: " Stephen Hemminger
2024-05-16 15:40     ` [PATCH v5 6/9] test/pmd_ring: initialize mbufs Stephen Hemminger
2024-05-16 15:40     ` [PATCH v5 7/9] net/ring: use generic SW stats Stephen Hemminger
2024-05-16 15:40     ` [PATCH v5 8/9] net/tap: " Stephen Hemminger
2024-05-16 15:40     ` [PATCH v5 9/9] net/null: " Stephen Hemminger
2024-05-17  0:12   ` [PATCH v6 0/9] Generic 64 bit counters for SW drivers Stephen Hemminger
2024-05-17  0:12     ` [PATCH v6 1/9] eal: generic 64 bit counter Stephen Hemminger
2024-05-17  2:45       ` Honnappa Nagarahalli
2024-05-17  3:30         ` Stephen Hemminger
2024-05-17  4:26           ` Honnappa Nagarahalli
2024-05-17  6:44             ` Morten Brørup
2024-05-17 15:05               ` Stephen Hemminger
2024-05-17 16:18               ` Stephen Hemminger
2024-05-18 14:00                 ` Morten Brørup
2024-05-19 15:13                   ` Stephen Hemminger
2024-05-19 17:10                     ` Morten Brørup
2024-05-19 22:49                       ` Stephen Hemminger
2024-05-20  7:57                         ` Morten Brørup
2024-05-17 15:07             ` Stephen Hemminger
2024-05-17  0:12     ` [PATCH v6 2/9] ethdev: add common counters for statistics Stephen Hemminger
2024-05-17  0:12     ` [PATCH v6 3/9] net/af_packet: use generic SW stats Stephen Hemminger
2024-05-17  0:12     ` [PATCH v6 4/9] net/af_xdp: " Stephen Hemminger
2024-05-17 13:34       ` Loftus, Ciara
2024-05-17 14:54         ` Stephen Hemminger
2024-05-17  0:12     ` [PATCH v6 5/9] net/pcap: " Stephen Hemminger
2024-05-17  0:12     ` [PATCH v6 6/9] test/pmd_ring: initialize mbufs Stephen Hemminger
2024-05-17  0:12     ` [PATCH v6 7/9] net/ring: use generic SW stats Stephen Hemminger
2024-05-17  0:12     ` [PATCH v6 8/9] net/tap: " Stephen Hemminger
2024-05-17  0:12     ` [PATCH v6 9/9] net/null: " Stephen Hemminger
2024-05-17 17:35   ` [PATCH v7 0/9] Use weak atomic operations for SW PMD counters Stephen Hemminger
2024-05-17 17:35     ` [PATCH v7 1/9] eal: generic 64 bit counter Stephen Hemminger
2024-05-17 17:35     ` [PATCH v7 2/9] ethdev: add common counters for statistics Stephen Hemminger
2024-05-17 17:35     ` [PATCH v7 3/9] net/af_packet: use generic SW stats Stephen Hemminger
2024-05-17 17:35     ` [PATCH v7 4/9] net/af_xdp: " Stephen Hemminger
2024-05-17 17:35     ` [PATCH v7 5/9] net/pcap: " Stephen Hemminger
2024-05-17 17:35     ` [PATCH v7 6/9] test/pmd_ring: initialize mbufs Stephen Hemminger
2024-05-17 17:35     ` [PATCH v7 7/9] net/ring: use generic SW stats Stephen Hemminger
2024-05-17 17:35     ` [PATCH v7 8/9] net/tap: " Stephen Hemminger
2024-05-17 17:35     ` [PATCH v7 9/9] net/null: " Stephen Hemminger
2024-05-21 17:00   ` [PATCH v8 0/8] Common statistics routines for SW based PMD's Stephen Hemminger
2024-05-21 17:00     ` [PATCH v8 1/8] eal: generic 64 bit counter Stephen Hemminger
2024-05-21 17:00     ` [PATCH v8 2/8] ethdev: add common counters for statistics Stephen Hemminger
2024-05-21 17:00     ` [PATCH v8 3/8] net/af_packet: use generic SW stats Stephen Hemminger
2024-05-21 17:00     ` [PATCH v8 4/8] net/af_xdp: " Stephen Hemminger
2024-05-21 17:00     ` [PATCH v8 5/8] net/pcap: " Stephen Hemminger
2024-05-21 17:00     ` [PATCH v8 6/8] net/ring: " Stephen Hemminger
2024-05-21 17:00     ` [PATCH v8 7/8] net/tap: " Stephen Hemminger
2024-05-21 17:00     ` [PATCH v8 8/8] net/null: " Stephen Hemminger
2024-05-21 20:16   ` [PATCH v9 0/8] Common statistics for SW PMD's Stephen Hemminger
2024-05-21 20:16     ` [PATCH v9 1/8] eal: generic 64 bit counter Stephen Hemminger
2024-05-22  8:31       ` Morten Brørup
2024-05-22 15:33         ` Stephen Hemminger
2024-05-22 18:09           ` Morten Brørup
2024-05-22 19:53             ` Stephen Hemminger
2024-05-22 20:56               ` Morten Brørup
2024-05-22 15:37         ` Stephen Hemminger
2024-05-22 17:57           ` Morten Brørup
2024-05-22 19:01             ` Tyler Retzlaff
2024-05-22 19:51               ` Stephen Hemminger
2024-05-26 14:46                 ` Mattias Rönnblom
2024-05-26 14:39               ` Mattias Rönnblom
2024-05-21 20:16     ` [PATCH v9 2/8] ethdev: add common counters for statistics Stephen Hemminger
2024-05-21 20:16     ` [PATCH v9 3/8] net/af_packet: use generic SW stats Stephen Hemminger
2024-05-21 20:16     ` [PATCH v9 4/8] net/af_xdp: " Stephen Hemminger
2024-05-21 20:16     ` [PATCH v9 5/8] net/pcap: " Stephen Hemminger
2024-05-21 20:16     ` [PATCH v9 6/8] net/ring: " Stephen Hemminger
2024-05-21 20:16     ` [PATCH v9 7/8] net/tap: " Stephen Hemminger
2024-05-21 20:16     ` [PATCH v9 8/8] net/null: " Stephen Hemminger
2024-05-22 16:12   ` [PATCH v10 0/8] Common statistics for software PMD's Stephen Hemminger
2024-05-22 16:12     ` [PATCH v10 1/8] eal: generic 64 bit counter Stephen Hemminger
2024-05-22 16:12     ` [PATCH v10 2/8] ethdev: add common counters for statistics Stephen Hemminger
2024-05-22 16:12     ` [PATCH v10 3/8] net/af_packet: use generic SW stats Stephen Hemminger
2024-05-22 16:12     ` [PATCH v10 4/8] net/af_xdp: " Stephen Hemminger
2024-05-22 16:12     ` [PATCH v10 5/8] net/pcap: " Stephen Hemminger
2024-05-22 16:12     ` [PATCH v10 6/8] net/ring: " Stephen Hemminger
2024-05-22 16:12     ` [PATCH v10 7/8] net/tap: " Stephen Hemminger
2024-05-22 16:12     ` [PATCH v10 8/8] net/null: " Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).