DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH] virtio: optimize stats counters performance
@ 2024-07-31 13:17 Morten Brørup
  2024-07-31 14:04 ` [PATCH v2] " Morten Brørup
                   ` (3 more replies)
  0 siblings, 4 replies; 19+ messages in thread
From: Morten Brørup @ 2024-07-31 13:17 UTC (permalink / raw)
  To: maxime.coquelin, chenbox; +Cc: dev, Morten Brørup

Optimized the performance of updating the virtio statistics counters by
reducing the number of branches and inlining the function.

Ordered the packet size comparisons according to the probability with
typical internet traffic mix.

Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
---
 drivers/net/virtio/virtio_rxtx.c | 34 --------------------------------
 drivers/net/virtio/virtio_rxtx.h | 26 ++++++++++++++++++++++--
 2 files changed, 24 insertions(+), 36 deletions(-)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index f69b9453a2..bb04fd7d43 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -81,40 +81,6 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 	dp->next = VQ_RING_DESC_CHAIN_END;
 }
 
-void
-virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
-{
-	uint32_t s = mbuf->pkt_len;
-	struct rte_ether_addr *ea;
-
-	stats->bytes += s;
-
-	if (s == 64) {
-		stats->size_bins[1]++;
-	} else if (s > 64 && s < 1024) {
-		uint32_t bin;
-
-		/* count zeros, and offset into correct bin */
-		bin = (sizeof(s) * 8) - rte_clz32(s) - 5;
-		stats->size_bins[bin]++;
-	} else {
-		if (s < 64)
-			stats->size_bins[0]++;
-		else if (s < 1519)
-			stats->size_bins[6]++;
-		else
-			stats->size_bins[7]++;
-	}
-
-	ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
-	if (rte_is_multicast_ether_addr(ea)) {
-		if (rte_is_broadcast_ether_addr(ea))
-			stats->broadcast++;
-		else
-			stats->multicast++;
-	}
-}
-
 static inline void
 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
 {
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index afc4b74534..6f048199d3 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -35,7 +35,29 @@ struct virtnet_tx {
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-void virtio_update_packet_stats(struct virtnet_stats *stats,
-				struct rte_mbuf *mbuf);
+
+static inline void
+virtio_update_packet_stats(struct virtnet_stats *const stats, const struct rte_mbuf *const mbuf)
+{
+	uint32_t s = mbuf->pkt_len;
+	const struct rte_ether_addr *const ea = rte_pktmbuf_mtod(mbuf, const struct rte_ether_addr *);
+
+	stats->bytes += s;
+
+	if (s >= 1024) {
+		stats->size_bins[6 + (s > 1518)]++;
+	} else if (s <= 64) {
+		stats->size_bins[s >> 6]++;
+	} else {
+		/* count zeros, and offset into correct bin */
+		uint32_t bin = (sizeof(s) * 8) - rte_clz32(s) - 5;
+		stats->size_bins[bin]++;
+	}
+
+	RTE_BUILD_BUG_ON(offsetof(struct virtnet_stats, broadcast) !=
+			offsetof(struct virtnet_stats, multicast) + sizeof(uint64_t));
+	if (rte_is_multicast_ether_addr(ea))
+		(&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
+}
 
 #endif /* _VIRTIO_RXTX_H_ */
-- 
2.43.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v2] virtio: optimize stats counters performance
  2024-07-31 13:17 [PATCH] virtio: optimize stats counters performance Morten Brørup
@ 2024-07-31 14:04 ` Morten Brørup
  2024-07-31 15:13 ` [PATCH v3] " Morten Brørup
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 19+ messages in thread
From: Morten Brørup @ 2024-07-31 14:04 UTC (permalink / raw)
  To: maxime.coquelin, chenbox; +Cc: dev, Morten Brørup

Optimized the performance of updating the virtio statistics counters by
reducing the number of branches and inlining the function.

Ordered the packet size comparisons according to the probability with
typical internet traffic mix.

Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
---
v2:
* Fixed checkpatch warning about line length.
---
 drivers/net/virtio/virtio_rxtx.c | 34 --------------------------------
 drivers/net/virtio/virtio_rxtx.h | 26 ++++++++++++++++++++++--
 2 files changed, 24 insertions(+), 36 deletions(-)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index f69b9453a2..bb04fd7d43 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -81,40 +81,6 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 	dp->next = VQ_RING_DESC_CHAIN_END;
 }
 
-void
-virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
-{
-	uint32_t s = mbuf->pkt_len;
-	struct rte_ether_addr *ea;
-
-	stats->bytes += s;
-
-	if (s == 64) {
-		stats->size_bins[1]++;
-	} else if (s > 64 && s < 1024) {
-		uint32_t bin;
-
-		/* count zeros, and offset into correct bin */
-		bin = (sizeof(s) * 8) - rte_clz32(s) - 5;
-		stats->size_bins[bin]++;
-	} else {
-		if (s < 64)
-			stats->size_bins[0]++;
-		else if (s < 1519)
-			stats->size_bins[6]++;
-		else
-			stats->size_bins[7]++;
-	}
-
-	ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
-	if (rte_is_multicast_ether_addr(ea)) {
-		if (rte_is_broadcast_ether_addr(ea))
-			stats->broadcast++;
-		else
-			stats->multicast++;
-	}
-}
-
 static inline void
 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
 {
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index afc4b74534..81325ed842 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -35,7 +35,29 @@ struct virtnet_tx {
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-void virtio_update_packet_stats(struct virtnet_stats *stats,
-				struct rte_mbuf *mbuf);
+
+static inline void
+virtio_update_packet_stats(struct virtnet_stats *const stats, const struct rte_mbuf *const mbuf)
+{
+	uint32_t s = mbuf->pkt_len;
+	const struct rte_ether_addr *ea = rte_pktmbuf_mtod(mbuf, const struct rte_ether_addr *);
+
+	stats->bytes += s;
+
+	if (s >= 1024) {
+		stats->size_bins[6 + (s > 1518)]++;
+	} else if (s <= 64) {
+		stats->size_bins[s >> 6]++;
+	} else {
+		/* count zeros, and offset into correct bin */
+		uint32_t bin = (sizeof(s) * 8) - rte_clz32(s) - 5;
+		stats->size_bins[bin]++;
+	}
+
+	RTE_BUILD_BUG_ON(offsetof(struct virtnet_stats, broadcast) !=
+			offsetof(struct virtnet_stats, multicast) + sizeof(uint64_t));
+	if (rte_is_multicast_ether_addr(ea))
+		(&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
+}
 
 #endif /* _VIRTIO_RXTX_H_ */
-- 
2.43.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v3] virtio: optimize stats counters performance
  2024-07-31 13:17 [PATCH] virtio: optimize stats counters performance Morten Brørup
  2024-07-31 14:04 ` [PATCH v2] " Morten Brørup
@ 2024-07-31 15:13 ` Morten Brørup
  2024-07-31 22:58 ` [PATCH v4] " Morten Brørup
  2024-08-01 16:03 ` [PATCH v5] " Morten Brørup
  3 siblings, 0 replies; 19+ messages in thread
From: Morten Brørup @ 2024-07-31 15:13 UTC (permalink / raw)
  To: maxime.coquelin, chenbox; +Cc: dev, Morten Brørup

Optimized the performance of updating the virtio statistics counters by
reducing the number of branches and inlining the function.

Ordered the packet size comparisons according to the probability with
typical internet traffic mix.

Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
---
v3:
* Eliminated a local variable.
* Note: Substituted sizeof(uint32_t)*4 by 32UL, using unsigned long type
  to keep optimal offsetting in generated assembler output.
* Removed unnecessary curly braces.
v2:
* Fixed checkpatch warning about line length.
---
 drivers/net/virtio/virtio_rxtx.c | 34 --------------------------------
 drivers/net/virtio/virtio_rxtx.h | 23 +++++++++++++++++++--
 2 files changed, 21 insertions(+), 36 deletions(-)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index f69b9453a2..bb04fd7d43 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -81,40 +81,6 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 	dp->next = VQ_RING_DESC_CHAIN_END;
 }
 
-void
-virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
-{
-	uint32_t s = mbuf->pkt_len;
-	struct rte_ether_addr *ea;
-
-	stats->bytes += s;
-
-	if (s == 64) {
-		stats->size_bins[1]++;
-	} else if (s > 64 && s < 1024) {
-		uint32_t bin;
-
-		/* count zeros, and offset into correct bin */
-		bin = (sizeof(s) * 8) - rte_clz32(s) - 5;
-		stats->size_bins[bin]++;
-	} else {
-		if (s < 64)
-			stats->size_bins[0]++;
-		else if (s < 1519)
-			stats->size_bins[6]++;
-		else
-			stats->size_bins[7]++;
-	}
-
-	ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
-	if (rte_is_multicast_ether_addr(ea)) {
-		if (rte_is_broadcast_ether_addr(ea))
-			stats->broadcast++;
-		else
-			stats->multicast++;
-	}
-}
-
 static inline void
 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
 {
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index afc4b74534..bcd552b907 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -35,7 +35,26 @@ struct virtnet_tx {
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-void virtio_update_packet_stats(struct virtnet_stats *stats,
-				struct rte_mbuf *mbuf);
+
+static inline void
+virtio_update_packet_stats(struct virtnet_stats *const stats, const struct rte_mbuf *const mbuf)
+{
+	uint32_t s = mbuf->pkt_len;
+	const struct rte_ether_addr *ea = rte_pktmbuf_mtod(mbuf, const struct rte_ether_addr *);
+
+	stats->bytes += s;
+
+	if (s >= 1024)
+		stats->size_bins[6 + (s > 1518)]++;
+	else if (s <= 64)
+		stats->size_bins[s >> 6]++;
+	else
+		stats->size_bins[32UL - rte_clz32(s) - 5]++;
+
+	RTE_BUILD_BUG_ON(offsetof(struct virtnet_stats, broadcast) !=
+			offsetof(struct virtnet_stats, multicast) + sizeof(uint64_t));
+	if (rte_is_multicast_ether_addr(ea))
+		(&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
+}
 
 #endif /* _VIRTIO_RXTX_H_ */
-- 
2.43.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v4] virtio: optimize stats counters performance
  2024-07-31 13:17 [PATCH] virtio: optimize stats counters performance Morten Brørup
  2024-07-31 14:04 ` [PATCH v2] " Morten Brørup
  2024-07-31 15:13 ` [PATCH v3] " Morten Brørup
@ 2024-07-31 22:58 ` Morten Brørup
  2024-07-31 23:45   ` Stephen Hemminger
  2024-08-01 16:03 ` [PATCH v5] " Morten Brørup
  3 siblings, 1 reply; 19+ messages in thread
From: Morten Brørup @ 2024-07-31 22:58 UTC (permalink / raw)
  To: maxime.coquelin, chenbox; +Cc: dev, Morten Brørup

Optimized the performance of updating the virtio statistics counters by
reducing the number of branches and inlining the function.

Ordered the packet size comparisons according to the probability with
typical internet traffic mix.

Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
---
v4:
* Consider multicast/broadcast packets unlikely.
v3:
* Eliminated a local variable.
* Note: Substituted sizeof(uint32_t)*4 by 32UL, using unsigned long type
  to keep optimal offsetting in generated assembler output.
* Removed unnecessary curly braces.
v2:
* Fixed checkpatch warning about line length.
---
 drivers/net/virtio/virtio_rxtx.c | 34 --------------------------------
 drivers/net/virtio/virtio_rxtx.h | 23 +++++++++++++++++++--
 2 files changed, 21 insertions(+), 36 deletions(-)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index f69b9453a2..bb04fd7d43 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -81,40 +81,6 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 	dp->next = VQ_RING_DESC_CHAIN_END;
 }
 
-void
-virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
-{
-	uint32_t s = mbuf->pkt_len;
-	struct rte_ether_addr *ea;
-
-	stats->bytes += s;
-
-	if (s == 64) {
-		stats->size_bins[1]++;
-	} else if (s > 64 && s < 1024) {
-		uint32_t bin;
-
-		/* count zeros, and offset into correct bin */
-		bin = (sizeof(s) * 8) - rte_clz32(s) - 5;
-		stats->size_bins[bin]++;
-	} else {
-		if (s < 64)
-			stats->size_bins[0]++;
-		else if (s < 1519)
-			stats->size_bins[6]++;
-		else
-			stats->size_bins[7]++;
-	}
-
-	ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
-	if (rte_is_multicast_ether_addr(ea)) {
-		if (rte_is_broadcast_ether_addr(ea))
-			stats->broadcast++;
-		else
-			stats->multicast++;
-	}
-}
-
 static inline void
 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
 {
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index afc4b74534..0f938ab145 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -35,7 +35,26 @@ struct virtnet_tx {
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-void virtio_update_packet_stats(struct virtnet_stats *stats,
-				struct rte_mbuf *mbuf);
+
+static inline void
+virtio_update_packet_stats(struct virtnet_stats *const stats, const struct rte_mbuf *const mbuf)
+{
+	uint32_t s = mbuf->pkt_len;
+	const struct rte_ether_addr *ea = rte_pktmbuf_mtod(mbuf, const struct rte_ether_addr *);
+
+	stats->bytes += s;
+
+	if (s >= 1024)
+		stats->size_bins[6 + (s > 1518)]++;
+	else if (s <= 64)
+		stats->size_bins[s >> 6]++;
+	else
+		stats->size_bins[32UL - rte_clz32(s) - 5]++;
+
+	RTE_BUILD_BUG_ON(offsetof(struct virtnet_stats, broadcast) !=
+			offsetof(struct virtnet_stats, multicast) + sizeof(uint64_t));
+	if (unlikely(rte_is_multicast_ether_addr(ea)))
+		(&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
+}
 
 #endif /* _VIRTIO_RXTX_H_ */
-- 
2.43.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4] virtio: optimize stats counters performance
  2024-07-31 22:58 ` [PATCH v4] " Morten Brørup
@ 2024-07-31 23:45   ` Stephen Hemminger
  2024-07-31 23:51     ` Morten Brørup
  0 siblings, 1 reply; 19+ messages in thread
From: Stephen Hemminger @ 2024-07-31 23:45 UTC (permalink / raw)
  To: Morten Brørup; +Cc: maxime.coquelin, chenbox, dev

On Wed, 31 Jul 2024 22:58:16 +0000
Morten Brørup <mb@smartsharesystems.com> wrote:

> +
> +static inline void
> +virtio_update_packet_stats(struct virtnet_stats *const stats, const struct rte_mbuf *const mbuf)
> +{
> +	uint32_t s = mbuf->pkt_len;
> +	const struct rte_ether_addr *ea = rte_pktmbuf_mtod(mbuf, const struct rte_ether_addr *);
> +
> +	stats->bytes += s;
> +
> +	if (s >= 1024)
> +		stats->size_bins[6 + (s > 1518)]++;
> +	else if (s <= 64)
> +		stats->size_bins[s >> 6]++;
> +	else
> +		stats->size_bins[32UL - rte_clz32(s) - 5]++;
> +
> +	RTE_BUILD_BUG_ON(offsetof(struct virtnet_stats, broadcast) !=
> +			offsetof(struct virtnet_stats, multicast) + sizeof(uint64_t));
> +	if (unlikely(rte_is_multicast_ether_addr(ea)))
> +		(&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
> +}
>  

Why move it to virtio_rxtx.h it was fine where it was.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: [PATCH v4] virtio: optimize stats counters performance
  2024-07-31 23:45   ` Stephen Hemminger
@ 2024-07-31 23:51     ` Morten Brørup
  0 siblings, 0 replies; 19+ messages in thread
From: Morten Brørup @ 2024-07-31 23:51 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: maxime.coquelin, chenbox, dev

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Thursday, 1 August 2024 01.46
> 
> On Wed, 31 Jul 2024 22:58:16 +0000
> Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> > +
> > +static inline void
> > +virtio_update_packet_stats(struct virtnet_stats *const stats, const
> struct rte_mbuf *const mbuf)
> > +{
> > +	uint32_t s = mbuf->pkt_len;
> > +	const struct rte_ether_addr *ea = rte_pktmbuf_mtod(mbuf, const
> struct rte_ether_addr *);
> > +
> > +	stats->bytes += s;
> > +
> > +	if (s >= 1024)
> > +		stats->size_bins[6 + (s > 1518)]++;
> > +	else if (s <= 64)
> > +		stats->size_bins[s >> 6]++;
> > +	else
> > +		stats->size_bins[32UL - rte_clz32(s) - 5]++;
> > +
> > +	RTE_BUILD_BUG_ON(offsetof(struct virtnet_stats, broadcast) !=
> > +			offsetof(struct virtnet_stats, multicast) +
> sizeof(uint64_t));
> > +	if (unlikely(rte_is_multicast_ether_addr(ea)))
> > +		(&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
> > +}
> >
> 
> Why move it to virtio_rxtx.h it was fine where it was.

Because it is also called from the vector implementations [1], where it was not inlined before.

[1]: https://elixir.bootlin.com/dpdk/v24.07/A/ident/virtio_update_packet_stats


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v5] virtio: optimize stats counters performance
  2024-07-31 13:17 [PATCH] virtio: optimize stats counters performance Morten Brørup
                   ` (2 preceding siblings ...)
  2024-07-31 22:58 ` [PATCH v4] " Morten Brørup
@ 2024-08-01 16:03 ` Morten Brørup
  2024-08-01 16:17   ` Stephen Hemminger
                     ` (5 more replies)
  3 siblings, 6 replies; 19+ messages in thread
From: Morten Brørup @ 2024-08-01 16:03 UTC (permalink / raw)
  To: maxime.coquelin, chenbox; +Cc: stephen, dev, Morten Brørup

Optimized the performance of updating the virtio statistics counters by
reducing the number of branches.

Ordered the packet size comparisons according to the probability with
typical internet traffic mix.

Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
---
v5:
* Do not inline the function. (Stephen)
v4:
* Consider multicast/broadcast packets unlikely.
v3:
* Eliminated a local variable.
* Note: Substituted sizeof(uint32_t)*4 by 32UL, using unsigned long type
  to keep optimal offsetting in generated assembler output.
* Removed unnecessary curly braces.
v2:
* Fixed checkpatch warning about line length.
---
 drivers/net/virtio/virtio_rxtx.c | 39 ++++++++++++--------------------
 drivers/net/virtio/virtio_rxtx.h |  4 ++--
 2 files changed, 16 insertions(+), 27 deletions(-)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index f69b9453a2..b67f063b31 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -82,37 +82,26 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 }
 
 void
-virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
+virtio_update_packet_stats(struct virtnet_stats *const stats,
+		const struct rte_mbuf *const mbuf)
 {
 	uint32_t s = mbuf->pkt_len;
-	struct rte_ether_addr *ea;
+	const struct rte_ether_addr *const ea =
+			rte_pktmbuf_mtod(mbuf, const struct rte_ether_addr *);
 
 	stats->bytes += s;
 
-	if (s == 64) {
-		stats->size_bins[1]++;
-	} else if (s > 64 && s < 1024) {
-		uint32_t bin;
-
-		/* count zeros, and offset into correct bin */
-		bin = (sizeof(s) * 8) - rte_clz32(s) - 5;
-		stats->size_bins[bin]++;
-	} else {
-		if (s < 64)
-			stats->size_bins[0]++;
-		else if (s < 1519)
-			stats->size_bins[6]++;
-		else
-			stats->size_bins[7]++;
-	}
+	if (s >= 1024)
+		stats->size_bins[6 + (s > 1518)]++;
+	else if (s <= 64)
+		stats->size_bins[s >> 6]++;
+	else
+		stats->size_bins[32UL - rte_clz32(s) - 5]++;
 
-	ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
-	if (rte_is_multicast_ether_addr(ea)) {
-		if (rte_is_broadcast_ether_addr(ea))
-			stats->broadcast++;
-		else
-			stats->multicast++;
-	}
+	RTE_BUILD_BUG_ON(offsetof(struct virtnet_stats, broadcast) !=
+			offsetof(struct virtnet_stats, multicast) + sizeof(uint64_t));
+	if (unlikely(rte_is_multicast_ether_addr(ea)))
+		(&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
 }
 
 static inline void
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index afc4b74534..68034c914b 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -35,7 +35,7 @@ struct virtnet_tx {
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
-void virtio_update_packet_stats(struct virtnet_stats *stats,
-				struct rte_mbuf *mbuf);
+void virtio_update_packet_stats(struct virtnet_stats *const stats,
+		const struct rte_mbuf *const mbuf);
 
 #endif /* _VIRTIO_RXTX_H_ */
-- 
2.43.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5] virtio: optimize stats counters performance
  2024-08-01 16:03 ` [PATCH v5] " Morten Brørup
@ 2024-08-01 16:17   ` Stephen Hemminger
  2024-08-01 20:38     ` Morten Brørup
  2024-08-02  2:23   ` lihuisong (C)
                     ` (4 subsequent siblings)
  5 siblings, 1 reply; 19+ messages in thread
From: Stephen Hemminger @ 2024-08-01 16:17 UTC (permalink / raw)
  To: Morten Brørup; +Cc: maxime.coquelin, chenbox, dev

On Thu,  1 Aug 2024 16:03:12 +0000
Morten Brørup <mb@smartsharesystems.com> wrote:

> Optimized the performance of updating the virtio statistics counters by
> reducing the number of branches.
> 
> Ordered the packet size comparisons according to the probability with
> typical internet traffic mix.
> 
> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>

LGTM
Acked-by: Stephen Hemminger <stephen@networkplumber.org>

I wonder if other software drivers could use similar counters?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: [PATCH v5] virtio: optimize stats counters performance
  2024-08-01 16:17   ` Stephen Hemminger
@ 2024-08-01 20:38     ` Morten Brørup
  2024-08-02 14:49       ` Morten Brørup
  0 siblings, 1 reply; 19+ messages in thread
From: Morten Brørup @ 2024-08-01 20:38 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: maxime.coquelin, chenbox, dev

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Thursday, 1 August 2024 18.18
> 
> On Thu,  1 Aug 2024 16:03:12 +0000
> Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> > Optimized the performance of updating the virtio statistics counters
> by
> > reducing the number of branches.
> >
> > Ordered the packet size comparisons according to the probability with
> > typical internet traffic mix.
> >
> > Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> 
> LGTM
> Acked-by: Stephen Hemminger <stephen@networkplumber.org>
> 
> I wonder if other software drivers could use similar counters?

While working on this, I noticed the netvsc driver and vhost lib also have size_bins [1], so they are likely candidates.
 
The netvsc's hn_update_packet_stats() function [2] seems like 100 % copy-paste, and should be easy to paste over with the new implementation.
The vhost lib's vhost_queue_stats_update() function [3] also looks rather similar to the original variant, and could benefit too.

[1]: https://elixir.bootlin.com/dpdk/v24.07/A/ident/size_bins
[2]: https://elixir.bootlin.com/dpdk/v24.07/source/drivers/net/netvsc/hn_rxtx.c#L108
[3]: https://elixir.bootlin.com/dpdk/v24.07/source/lib/vhost/virtio_net.c#L56

I'll take a look around and add similar patches for what I find. Thanks for the reminder.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5] virtio: optimize stats counters performance
  2024-08-01 16:03 ` [PATCH v5] " Morten Brørup
  2024-08-01 16:17   ` Stephen Hemminger
@ 2024-08-02  2:23   ` lihuisong (C)
  2024-08-02  2:42     ` Stephen Hemminger
  2024-08-02 11:27     ` Morten Brørup
  2024-08-05  1:19   ` lihuisong (C)
                     ` (3 subsequent siblings)
  5 siblings, 2 replies; 19+ messages in thread
From: lihuisong (C) @ 2024-08-02  2:23 UTC (permalink / raw)
  To: Morten Brørup, maxime.coquelin, chenbox; +Cc: stephen, dev


在 2024/8/2 0:03, Morten Brørup 写道:
> Optimized the performance of updating the virtio statistics counters by
> reducing the number of branches.
>
> Ordered the packet size comparisons according to the probability with
> typical internet traffic mix.
>
> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> ---
> v5:
> * Do not inline the function. (Stephen)
> v4:
> * Consider multicast/broadcast packets unlikely.
> v3:
> * Eliminated a local variable.
> * Note: Substituted sizeof(uint32_t)*4 by 32UL, using unsigned long type
>    to keep optimal offsetting in generated assembler output.
> * Removed unnecessary curly braces.
> v2:
> * Fixed checkpatch warning about line length.
> ---
>   drivers/net/virtio/virtio_rxtx.c | 39 ++++++++++++--------------------
>   drivers/net/virtio/virtio_rxtx.h |  4 ++--
>   2 files changed, 16 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> index f69b9453a2..b67f063b31 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -82,37 +82,26 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
>   }
>   
>   void
> -virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
> +virtio_update_packet_stats(struct virtnet_stats *const stats,
> +		const struct rte_mbuf *const mbuf)
The two const is also for performace?  Is there gain?
>   {
>   	uint32_t s = mbuf->pkt_len;
> -	struct rte_ether_addr *ea;
> +	const struct rte_ether_addr *const ea =
> +			rte_pktmbuf_mtod(mbuf, const struct rte_ether_addr *);
>   
>   	stats->bytes += s;
>   
> -	if (s == 64) {
> -		stats->size_bins[1]++;
> -	} else if (s > 64 && s < 1024) {
> -		uint32_t bin;
> -
> -		/* count zeros, and offset into correct bin */
> -		bin = (sizeof(s) * 8) - rte_clz32(s) - 5;
> -		stats->size_bins[bin]++;
> -	} else {
> -		if (s < 64)
> -			stats->size_bins[0]++;
> -		else if (s < 1519)
> -			stats->size_bins[6]++;
> -		else
> -			stats->size_bins[7]++;
> -	}
> +	if (s >= 1024)
> +		stats->size_bins[6 + (s > 1518)]++;
> +	else if (s <= 64)
> +		stats->size_bins[s >> 6]++;
> +	else
> +		stats->size_bins[32UL - rte_clz32(s) - 5]++;
>   
> -	ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
> -	if (rte_is_multicast_ether_addr(ea)) {
> -		if (rte_is_broadcast_ether_addr(ea))
> -			stats->broadcast++;
> -		else
> -			stats->multicast++;
> -	}
> +	RTE_BUILD_BUG_ON(offsetof(struct virtnet_stats, broadcast) !=
> +			offsetof(struct virtnet_stats, multicast) + sizeof(uint64_t));
> +	if (unlikely(rte_is_multicast_ether_addr(ea)))
> +		(&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
The "rte_is_broadcast_ether_addr(ea) " will be calculated twice if 
packet is mulitcast.
How about coding like:
-->
is_mulitcast = rte_is_multicast_ether_addr(ea);
if (unlikely(is_mulitcast))
(&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
>   }
>   
>   static inline void
> diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
> index afc4b74534..68034c914b 100644
> --- a/drivers/net/virtio/virtio_rxtx.h
> +++ b/drivers/net/virtio/virtio_rxtx.h
> @@ -35,7 +35,7 @@ struct virtnet_tx {
>   };
>   
>   int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
> -void virtio_update_packet_stats(struct virtnet_stats *stats,
> -				struct rte_mbuf *mbuf);
> +void virtio_update_packet_stats(struct virtnet_stats *const stats,
> +		const struct rte_mbuf *const mbuf);
>   
>   #endif /* _VIRTIO_RXTX_H_ */

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5] virtio: optimize stats counters performance
  2024-08-02  2:23   ` lihuisong (C)
@ 2024-08-02  2:42     ` Stephen Hemminger
  2024-08-02  3:17       ` lihuisong (C)
  2024-08-02 11:27     ` Morten Brørup
  1 sibling, 1 reply; 19+ messages in thread
From: Stephen Hemminger @ 2024-08-02  2:42 UTC (permalink / raw)
  To: lihuisong (C); +Cc: Morten Brørup, maxime.coquelin, chenbox, dev

On Fri, 2 Aug 2024 10:23:12 +0800
"lihuisong (C)" <lihuisong@huawei.com> wrote:

> >   void
> > -virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
> > +virtio_update_packet_stats(struct virtnet_stats *const stats,
> > +		const struct rte_mbuf *const mbuf)  
> The two const is also for performace?  Is there gain?

If you look at resulting code (ie godbolt.org) the resulting code never
changes when const is added.  The compiler already
knows what is modified. Const is only a programmer and correctness thing.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5] virtio: optimize stats counters performance
  2024-08-02  2:42     ` Stephen Hemminger
@ 2024-08-02  3:17       ` lihuisong (C)
  0 siblings, 0 replies; 19+ messages in thread
From: lihuisong (C) @ 2024-08-02  3:17 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Morten Brørup, maxime.coquelin, chenbox, dev


在 2024/8/2 10:42, Stephen Hemminger 写道:
> On Fri, 2 Aug 2024 10:23:12 +0800
> "lihuisong (C)" <lihuisong@huawei.com> wrote:
>
>>>    void
>>> -virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
>>> +virtio_update_packet_stats(struct virtnet_stats *const stats,
>>> +		const struct rte_mbuf *const mbuf)
>> The two const is also for performace?  Is there gain?
> If you look at resulting code (ie godbolt.org) the resulting code never
> changes when const is added.  The compiler already
> knows what is modified. Const is only a programmer and correctness thing.
I know this. Thanks for your clarifying and recommending the site that 
is good to use.😁
This patch is just for optimizing stats counters performance.
And the new added const has nothing to do with this optimization.
But it's ok for me.
> .

^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: [PATCH v5] virtio: optimize stats counters performance
  2024-08-02  2:23   ` lihuisong (C)
  2024-08-02  2:42     ` Stephen Hemminger
@ 2024-08-02 11:27     ` Morten Brørup
  2024-08-05  1:14       ` lihuisong (C)
  1 sibling, 1 reply; 19+ messages in thread
From: Morten Brørup @ 2024-08-02 11:27 UTC (permalink / raw)
  To: lihuisong (C), maxime.coquelin, chenbox; +Cc: stephen, dev

> From: lihuisong (C) [mailto:lihuisong@huawei.com]
> Sent: Friday, 2 August 2024 04.23
> 
> 在 2024/8/2 0:03, Morten Brørup 写道:
> >   void
> > -virtio_update_packet_stats(struct virtnet_stats *stats, struct
> rte_mbuf *mbuf)
> > +virtio_update_packet_stats(struct virtnet_stats *const stats,
> > +		const struct rte_mbuf *const mbuf)
> The two const is also for performace?  Is there gain?

The "const struct rte_mbuf * mbuf" informs the optimizer that the function does not modify the mbuf.  This is for the benefit of callers of the function; they can rely on the mbuf being unchanged when the function returns.
So, if the optimizer has cached some mbuf field before calling the function, it does not need to read the mbuf field again, but can continue using the cached value after the function call.

The two "type *const ptr" probably make no performance difference with the compilers and function call conventions used by the CPU architectures supported by DPDK.

> > +	RTE_BUILD_BUG_ON(offsetof(struct virtnet_stats, broadcast) !=
> > +			offsetof(struct virtnet_stats, multicast) +
> sizeof(uint64_t));
> > +	if (unlikely(rte_is_multicast_ether_addr(ea)))
> > +		(&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
> The "rte_is_broadcast_ether_addr(ea) " will be calculated twice if
> packet is mulitcast.
> How about coding like:
> -->
> is_mulitcast = rte_is_multicast_ether_addr(ea);
> if (unlikely(is_mulitcast))
> (&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;

I don't think "rte_is_broadcast_ether_addr(ea)" is calculated twice for multicast packets.
My code essentially does this:
	if (mc(ea))
		stats[bc(ea)]++;

Changing to this shouldn't make a difference:
	m = mc(ea);
	if (m)
		stats[bc(ea)]++;


^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: [PATCH v5] virtio: optimize stats counters performance
  2024-08-01 20:38     ` Morten Brørup
@ 2024-08-02 14:49       ` Morten Brørup
  0 siblings, 0 replies; 19+ messages in thread
From: Morten Brørup @ 2024-08-02 14:49 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: maxime.coquelin, chenbox, longli, weh, dev

> From: Morten Brørup [mailto:mb@smartsharesystems.com]
> Sent: Thursday, 1 August 2024 22.38
> 
> > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > Sent: Thursday, 1 August 2024 18.18
> >
> > On Thu,  1 Aug 2024 16:03:12 +0000
> > Morten Brørup <mb@smartsharesystems.com> wrote:
> >
> > > Optimized the performance of updating the virtio statistics counters
> > by
> > > reducing the number of branches.
> > >
> > > Ordered the packet size comparisons according to the probability
> with
> > > typical internet traffic mix.
> > >
> > > Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> >
> > LGTM
> > Acked-by: Stephen Hemminger <stephen@networkplumber.org>
> >
> > I wonder if other software drivers could use similar counters?
> 
> While working on this, I noticed the netvsc driver and vhost lib also
> have size_bins [1], so they are likely candidates.
> 
> The netvsc's hn_update_packet_stats() function [2] seems like 100 %
> copy-paste, and should be easy to paste over with the new
> implementation.
> The vhost lib's vhost_queue_stats_update() function [3] also looks
> rather similar to the original variant, and could benefit too.
> 
> [1]: https://elixir.bootlin.com/dpdk/v24.07/A/ident/size_bins
> [2]:
> https://elixir.bootlin.com/dpdk/v24.07/source/drivers/net/netvsc/hn_rxtx
> .c#L108
> [3]:
> https://elixir.bootlin.com/dpdk/v24.07/source/lib/vhost/virtio_net.c#L56
> 
> I'll take a look around and add similar patches for what I find. Thanks
> for the reminder.

Separate patches for vhost-user [4] and netvsc [5] now provided.

[4]: https://inbox.dpdk.org/dev/20240802143259.269827-1-mb@smartsharesystems.com/T/#u
[5]: https://inbox.dpdk.org/dev/20240802144048.270152-1-mb@smartsharesystems.com/T/#u


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5] virtio: optimize stats counters performance
  2024-08-02 11:27     ` Morten Brørup
@ 2024-08-05  1:14       ` lihuisong (C)
  0 siblings, 0 replies; 19+ messages in thread
From: lihuisong (C) @ 2024-08-05  1:14 UTC (permalink / raw)
  To: Morten Brørup, maxime.coquelin, chenbox; +Cc: stephen, dev


在 2024/8/2 19:27, Morten Brørup 写道:
>> From: lihuisong (C) [mailto:lihuisong@huawei.com]
>> Sent: Friday, 2 August 2024 04.23
>>
>> 在 2024/8/2 0:03, Morten Brørup 写道:
>>>    void
>>> -virtio_update_packet_stats(struct virtnet_stats *stats, struct
>> rte_mbuf *mbuf)
>>> +virtio_update_packet_stats(struct virtnet_stats *const stats,
>>> +		const struct rte_mbuf *const mbuf)
>> The two const is also for performace?  Is there gain?
> The "const struct rte_mbuf * mbuf" informs the optimizer that the function does not modify the mbuf.  This is for the benefit of callers of the function; they can rely on the mbuf being unchanged when the function returns.
> So, if the optimizer has cached some mbuf field before calling the function, it does not need to read the mbuf field again, but can continue using the cached value after the function call.
>
> The two "type *const ptr" probably make no performance difference with the compilers and function call conventions used by the CPU architectures supported by DPDK.
ok
>
>>> +	RTE_BUILD_BUG_ON(offsetof(struct virtnet_stats, broadcast) !=
>>> +			offsetof(struct virtnet_stats, multicast) +
>> sizeof(uint64_t));
>>> +	if (unlikely(rte_is_multicast_ether_addr(ea)))
>>> +		(&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
>> The "rte_is_broadcast_ether_addr(ea) " will be calculated twice if
>> packet is mulitcast.
>> How about coding like:
>> -->
>> is_mulitcast = rte_is_multicast_ether_addr(ea);
>> if (unlikely(is_mulitcast))
>> (&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
> I don't think "rte_is_broadcast_ether_addr(ea)" is calculated twice for multicast packets.
> My code essentially does this:
> 	if (mc(ea))
> 		stats[bc(ea)]++;
>
> Changing to this shouldn't make a difference:
> 	m = mc(ea);
> 	if (m)
> 		stats[bc(ea)]++;
Yeah,you are right.
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5] virtio: optimize stats counters performance
  2024-08-01 16:03 ` [PATCH v5] " Morten Brørup
  2024-08-01 16:17   ` Stephen Hemminger
  2024-08-02  2:23   ` lihuisong (C)
@ 2024-08-05  1:19   ` lihuisong (C)
  2024-08-06  8:23   ` Chenbo Xia
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 19+ messages in thread
From: lihuisong (C) @ 2024-08-05  1:19 UTC (permalink / raw)
  To: Morten Brørup, maxime.coquelin, chenbox; +Cc: stephen, dev

LGTM,

Acked-by: Huisong Li <lihuisong@huawei.com>

在 2024/8/2 0:03, Morten Brørup 写道:
> Optimized the performance of updating the virtio statistics counters by
> reducing the number of branches.
>
> Ordered the packet size comparisons according to the probability with
> typical internet traffic mix.
>
> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> ---
> v5:
> * Do not inline the function. (Stephen)
> v4:
> * Consider multicast/broadcast packets unlikely.
> v3:
> * Eliminated a local variable.
> * Note: Substituted sizeof(uint32_t)*4 by 32UL, using unsigned long type
>    to keep optimal offsetting in generated assembler output.
> * Removed unnecessary curly braces.
> v2:
> * Fixed checkpatch warning about line length.
> ---
>   drivers/net/virtio/virtio_rxtx.c | 39 ++++++++++++--------------------
>   drivers/net/virtio/virtio_rxtx.h |  4 ++--
>   2 files changed, 16 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> index f69b9453a2..b67f063b31 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -82,37 +82,26 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
>   }
>   
>   void
> -virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
> +virtio_update_packet_stats(struct virtnet_stats *const stats,
> +		const struct rte_mbuf *const mbuf)
>   {
>   	uint32_t s = mbuf->pkt_len;
> -	struct rte_ether_addr *ea;
> +	const struct rte_ether_addr *const ea =
> +			rte_pktmbuf_mtod(mbuf, const struct rte_ether_addr *);
>   
>   	stats->bytes += s;
>   
> -	if (s == 64) {
> -		stats->size_bins[1]++;
> -	} else if (s > 64 && s < 1024) {
> -		uint32_t bin;
> -
> -		/* count zeros, and offset into correct bin */
> -		bin = (sizeof(s) * 8) - rte_clz32(s) - 5;
> -		stats->size_bins[bin]++;
> -	} else {
> -		if (s < 64)
> -			stats->size_bins[0]++;
> -		else if (s < 1519)
> -			stats->size_bins[6]++;
> -		else
> -			stats->size_bins[7]++;
> -	}
> +	if (s >= 1024)
> +		stats->size_bins[6 + (s > 1518)]++;
> +	else if (s <= 64)
> +		stats->size_bins[s >> 6]++;
> +	else
> +		stats->size_bins[32UL - rte_clz32(s) - 5]++;
>   
> -	ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
> -	if (rte_is_multicast_ether_addr(ea)) {
> -		if (rte_is_broadcast_ether_addr(ea))
> -			stats->broadcast++;
> -		else
> -			stats->multicast++;
> -	}
> +	RTE_BUILD_BUG_ON(offsetof(struct virtnet_stats, broadcast) !=
> +			offsetof(struct virtnet_stats, multicast) + sizeof(uint64_t));
> +	if (unlikely(rte_is_multicast_ether_addr(ea)))
> +		(&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
>   }
>   
>   static inline void
> diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
> index afc4b74534..68034c914b 100644
> --- a/drivers/net/virtio/virtio_rxtx.h
> +++ b/drivers/net/virtio/virtio_rxtx.h
> @@ -35,7 +35,7 @@ struct virtnet_tx {
>   };
>   
>   int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
> -void virtio_update_packet_stats(struct virtnet_stats *stats,
> -				struct rte_mbuf *mbuf);
> +void virtio_update_packet_stats(struct virtnet_stats *const stats,
> +		const struct rte_mbuf *const mbuf);
>   
>   #endif /* _VIRTIO_RXTX_H_ */

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5] virtio: optimize stats counters performance
  2024-08-01 16:03 ` [PATCH v5] " Morten Brørup
                     ` (2 preceding siblings ...)
  2024-08-05  1:19   ` lihuisong (C)
@ 2024-08-06  8:23   ` Chenbo Xia
  2024-09-10 15:01   ` Maxime Coquelin
  2024-09-19 12:04   ` Maxime Coquelin
  5 siblings, 0 replies; 19+ messages in thread
From: Chenbo Xia @ 2024-08-06  8:23 UTC (permalink / raw)
  To: Morten Brørup; +Cc: Maxime Coquelin, stephen, dev



> On Aug 2, 2024, at 00:03, Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> Optimized the performance of updating the virtio statistics counters by
> reducing the number of branches.
> 
> Ordered the packet size comparisons according to the probability with
> typical internet traffic mix.
> 
> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> ---
> v5:
> * Do not inline the function. (Stephen)
> v4:
> * Consider multicast/broadcast packets unlikely.
> v3:
> * Eliminated a local variable.
> * Note: Substituted sizeof(uint32_t)*4 by 32UL, using unsigned long type
>  to keep optimal offsetting in generated assembler output.
> * Removed unnecessary curly braces.
> v2:
> * Fixed checkpatch warning about line length.
> ---
> drivers/net/virtio/virtio_rxtx.c | 39 ++++++++++++--------------------
> drivers/net/virtio/virtio_rxtx.h |  4 ++--
> 2 files changed, 16 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> index f69b9453a2..b67f063b31 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -82,37 +82,26 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
> }
> 
> void
> -virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
> +virtio_update_packet_stats(struct virtnet_stats *const stats,
> +               const struct rte_mbuf *const mbuf)
> {
>        uint32_t s = mbuf->pkt_len;
> -       struct rte_ether_addr *ea;
> +       const struct rte_ether_addr *const ea =
> +                       rte_pktmbuf_mtod(mbuf, const struct rte_ether_addr *);
> 
>        stats->bytes += s;
> 
> -       if (s == 64) {
> -               stats->size_bins[1]++;
> -       } else if (s > 64 && s < 1024) {
> -               uint32_t bin;
> -
> -               /* count zeros, and offset into correct bin */
> -               bin = (sizeof(s) * 8) - rte_clz32(s) - 5;
> -               stats->size_bins[bin]++;
> -       } else {
> -               if (s < 64)
> -                       stats->size_bins[0]++;
> -               else if (s < 1519)
> -                       stats->size_bins[6]++;
> -               else
> -                       stats->size_bins[7]++;
> -       }
> +       if (s >= 1024)
> +               stats->size_bins[6 + (s > 1518)]++;
> +       else if (s <= 64)
> +               stats->size_bins[s >> 6]++;
> +       else
> +               stats->size_bins[32UL - rte_clz32(s) - 5]++;
> 
> -       ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
> -       if (rte_is_multicast_ether_addr(ea)) {
> -               if (rte_is_broadcast_ether_addr(ea))
> -                       stats->broadcast++;
> -               else
> -                       stats->multicast++;
> -       }
> +       RTE_BUILD_BUG_ON(offsetof(struct virtnet_stats, broadcast) !=
> +                       offsetof(struct virtnet_stats, multicast) + sizeof(uint64_t));
> +       if (unlikely(rte_is_multicast_ether_addr(ea)))
> +               (&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
> }
> 
> static inline void
> diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
> index afc4b74534..68034c914b 100644
> --- a/drivers/net/virtio/virtio_rxtx.h
> +++ b/drivers/net/virtio/virtio_rxtx.h
> @@ -35,7 +35,7 @@ struct virtnet_tx {
> };
> 
> int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
> -void virtio_update_packet_stats(struct virtnet_stats *stats,
> -                               struct rte_mbuf *mbuf);
> +void virtio_update_packet_stats(struct virtnet_stats *const stats,
> +               const struct rte_mbuf *const mbuf);
> 
> #endif /* _VIRTIO_RXTX_H_ */
> —
> 2.43.0
> 

Reviewed-by: Chenbo Xia <chenbox@nvidia.com>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5] virtio: optimize stats counters performance
  2024-08-01 16:03 ` [PATCH v5] " Morten Brørup
                     ` (3 preceding siblings ...)
  2024-08-06  8:23   ` Chenbo Xia
@ 2024-09-10 15:01   ` Maxime Coquelin
  2024-09-19 12:04   ` Maxime Coquelin
  5 siblings, 0 replies; 19+ messages in thread
From: Maxime Coquelin @ 2024-09-10 15:01 UTC (permalink / raw)
  To: Morten Brørup, chenbox; +Cc: stephen, dev



On 8/1/24 18:03, Morten Brørup wrote:
> Optimized the performance of updating the virtio statistics counters by
> reducing the number of branches.
> 
> Ordered the packet size comparisons according to the probability with
> typical internet traffic mix.
> 
> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> ---
> v5:
> * Do not inline the function. (Stephen)
> v4:
> * Consider multicast/broadcast packets unlikely.
> v3:
> * Eliminated a local variable.
> * Note: Substituted sizeof(uint32_t)*4 by 32UL, using unsigned long type
>    to keep optimal offsetting in generated assembler output.
> * Removed unnecessary curly braces.
> v2:
> * Fixed checkpatch warning about line length.
> ---
>   drivers/net/virtio/virtio_rxtx.c | 39 ++++++++++++--------------------
>   drivers/net/virtio/virtio_rxtx.h |  4 ++--
>   2 files changed, 16 insertions(+), 27 deletions(-)
>

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5] virtio: optimize stats counters performance
  2024-08-01 16:03 ` [PATCH v5] " Morten Brørup
                     ` (4 preceding siblings ...)
  2024-09-10 15:01   ` Maxime Coquelin
@ 2024-09-19 12:04   ` Maxime Coquelin
  5 siblings, 0 replies; 19+ messages in thread
From: Maxime Coquelin @ 2024-09-19 12:04 UTC (permalink / raw)
  To: Morten Brørup, chenbox; +Cc: stephen, dev



On 8/1/24 18:03, Morten Brørup wrote:
> Optimized the performance of updating the virtio statistics counters by
> reducing the number of branches.
> 
> Ordered the packet size comparisons according to the probability with
> typical internet traffic mix.
> 
> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> ---
> v5:
> * Do not inline the function. (Stephen)
> v4:
> * Consider multicast/broadcast packets unlikely.
> v3:
> * Eliminated a local variable.
> * Note: Substituted sizeof(uint32_t)*4 by 32UL, using unsigned long type
>    to keep optimal offsetting in generated assembler output.
> * Removed unnecessary curly braces.
> v2:
> * Fixed checkpatch warning about line length.
> ---
>   drivers/net/virtio/virtio_rxtx.c | 39 ++++++++++++--------------------
>   drivers/net/virtio/virtio_rxtx.h |  4 ++--
>   2 files changed, 16 insertions(+), 27 deletions(-)
> 

Applied to next-virtio/for-next-net.

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2024-09-19 12:05 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-07-31 13:17 [PATCH] virtio: optimize stats counters performance Morten Brørup
2024-07-31 14:04 ` [PATCH v2] " Morten Brørup
2024-07-31 15:13 ` [PATCH v3] " Morten Brørup
2024-07-31 22:58 ` [PATCH v4] " Morten Brørup
2024-07-31 23:45   ` Stephen Hemminger
2024-07-31 23:51     ` Morten Brørup
2024-08-01 16:03 ` [PATCH v5] " Morten Brørup
2024-08-01 16:17   ` Stephen Hemminger
2024-08-01 20:38     ` Morten Brørup
2024-08-02 14:49       ` Morten Brørup
2024-08-02  2:23   ` lihuisong (C)
2024-08-02  2:42     ` Stephen Hemminger
2024-08-02  3:17       ` lihuisong (C)
2024-08-02 11:27     ` Morten Brørup
2024-08-05  1:14       ` lihuisong (C)
2024-08-05  1:19   ` lihuisong (C)
2024-08-06  8:23   ` Chenbo Xia
2024-09-10 15:01   ` Maxime Coquelin
2024-09-19 12:04   ` Maxime Coquelin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).