The throughput calculation requires a counter that measures passing of time. The PMU cycle counter does not do that. This results in incorrect throughput numbers when RTE_ARM_EAL_RDTSC_USE_PMU is enabled. Use clock_gettime system call to calculate the time passed since last call. Bugzilla ID: 450 Fixes: 0e106980301d ("app/testpmd: show throughput in port stats") Cc: zhihong.wang@intel.com Cc: stable@dpdk.org Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Reviewed-by: Phil Yang <phil.yang@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test-pmd/config.c | 44 +++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c index 016bcb09c..91fbf99f8 100644 --- a/app/test-pmd/config.c +++ b/app/test-pmd/config.c @@ -54,6 +54,14 @@ #define ETHDEV_FWVERS_LEN 32 +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW +#else +#define CLOCK_TYPE_ID CLOCK_MONOTONIC +#endif + +#define NS_PER_SEC 1E9 + static char *flowtype_to_str(uint16_t flow_type); static const struct { @@ -136,9 +144,10 @@ nic_stats_display(portid_t port_id) static uint64_t prev_pkts_tx[RTE_MAX_ETHPORTS]; static uint64_t prev_bytes_rx[RTE_MAX_ETHPORTS]; static uint64_t prev_bytes_tx[RTE_MAX_ETHPORTS]; - static uint64_t prev_cycles[RTE_MAX_ETHPORTS]; + static uint64_t prev_ns[RTE_MAX_ETHPORTS]; + struct timespec cur_time; uint64_t diff_pkts_rx, diff_pkts_tx, diff_bytes_rx, diff_bytes_tx, - diff_cycles; + diff_ns; uint64_t mpps_rx, mpps_tx, mbps_rx, mbps_tx; struct rte_eth_stats stats; struct rte_port *port = &ports[port_id]; @@ -195,10 +204,17 @@ nic_stats_display(portid_t port_id) } } - diff_cycles = prev_cycles[port_id]; - prev_cycles[port_id] = rte_rdtsc(); - if (diff_cycles > 0) - diff_cycles = prev_cycles[port_id] - diff_cycles; + diff_ns = 0; + if (clock_gettime(CLOCK_TYPE_ID, &cur_time) == 0) { + uint64_t ns; + + ns = cur_time.tv_sec * NS_PER_SEC; + ns += cur_time.tv_nsec; + + if (prev_ns[port_id] != 0) + diff_ns = ns - prev_ns[port_id]; + prev_ns[port_id] = ns; + } diff_pkts_rx = (stats.ipackets > prev_pkts_rx[port_id]) ? (stats.ipackets - prev_pkts_rx[port_id]) : 0; @@ -206,10 +222,10 @@ nic_stats_display(portid_t port_id) (stats.opackets - prev_pkts_tx[port_id]) : 0; prev_pkts_rx[port_id] = stats.ipackets; prev_pkts_tx[port_id] = stats.opackets; - mpps_rx = diff_cycles > 0 ? - diff_pkts_rx * rte_get_tsc_hz() / diff_cycles : 0; - mpps_tx = diff_cycles > 0 ? - diff_pkts_tx * rte_get_tsc_hz() / diff_cycles : 0; + mpps_rx = diff_ns > 0 ? + (double)diff_pkts_rx / diff_ns * NS_PER_SEC : 0; + mpps_tx = diff_ns > 0 ? + (double)diff_pkts_tx / diff_ns * NS_PER_SEC : 0; diff_bytes_rx = (stats.ibytes > prev_bytes_rx[port_id]) ? (stats.ibytes - prev_bytes_rx[port_id]) : 0; @@ -217,10 +233,10 @@ nic_stats_display(portid_t port_id) (stats.obytes - prev_bytes_tx[port_id]) : 0; prev_bytes_rx[port_id] = stats.ibytes; prev_bytes_tx[port_id] = stats.obytes; - mbps_rx = diff_cycles > 0 ? - diff_bytes_rx * rte_get_tsc_hz() / diff_cycles : 0; - mbps_tx = diff_cycles > 0 ? - diff_bytes_tx * rte_get_tsc_hz() / diff_cycles : 0; + mbps_rx = diff_ns > 0 ? + (double)diff_bytes_rx / diff_ns * NS_PER_SEC : 0; + mbps_tx = diff_ns > 0 ? + (double)diff_bytes_tx / diff_ns * NS_PER_SEC : 0; printf("\n Throughput (since last show)\n"); printf(" Rx-pps: %12"PRIu64" Rx-bps: %12"PRIu64"\n Tx-pps: %12" -- 2.17.1
Enable missing burst stats for rx path for flowgen mode. Fixes: e9e23a617eb8 ("app/testpmd: add flowgen forwarding engine") Cc: stable@dpdk.org Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Reviewed-by: Phil Yang <phil.yang@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test-pmd/flowgen.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c index 4bd351e67..011887c61 100644 --- a/app/test-pmd/flowgen.c +++ b/app/test-pmd/flowgen.c @@ -111,6 +111,10 @@ pkt_burst_flow_gen(struct fwd_stream *fs) /* Receive a burst of packets and discard them. */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; +#endif + fs->rx_packets += nb_rx; for (i = 0; i < nb_rx; i++) -- 2.17.1
From: Phil Yang <phil.yang@arm.com> Add burst stats for noisy vnf mode. Fixes: 3c156061b938 ("app/testpmd: add noisy neighbour forwarding mode") Cc: stable@dpdk.org Cc: jfreimann@redhat.com Signed-off-by: Phil Yang <phil.yang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test-pmd/noisy_vnf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c index 58c4ee925..24f286da6 100644 --- a/app/test-pmd/noisy_vnf.c +++ b/app/test-pmd/noisy_vnf.c @@ -154,6 +154,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; +#endif if (unlikely(nb_rx == 0)) goto flush; fs->rx_packets += nb_rx; @@ -164,6 +167,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) pkts_burst, nb_rx); if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) nb_tx += do_retry(nb_rx, nb_tx, pkts_burst, fs); +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; +#endif fs->tx_packets += nb_tx; fs->fwd_dropped += drop_pkts(pkts_burst, nb_rx, nb_tx); return; @@ -187,6 +193,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) nb_deqd); if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) nb_tx += do_retry(nb_rx, nb_tx, tmp_pkts, fs); +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; +#endif fs->fwd_dropped += drop_pkts(tmp_pkts, nb_deqd, nb_tx); } } @@ -211,6 +220,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) tmp_pkts, nb_deqd); if (unlikely(sent < nb_deqd) && fs->retry_enabled) nb_tx += do_retry(nb_rx, nb_tx, tmp_pkts, fs); +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; +#endif fs->fwd_dropped += drop_pkts(tmp_pkts, nb_deqd, sent); ncf->prev_time = rte_get_timer_cycles(); } -- 2.17.1
The burst % calculation can over flow due to multiplication. Fix the multiplication and increase the size of variables to 64b. Fixes: af75078fece3 ("first public release") Cc: stable@dpdk.org Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Reviewed-by: Phil Yang <phil.yang@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test-pmd/testpmd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 4989d22ca..2e1493da2 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -1692,9 +1692,9 @@ init_fwd_streams(void) static void pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs) { - unsigned int total_burst; - unsigned int nb_burst; - unsigned int burst_stats[3]; + uint64_t total_burst; + uint64_t nb_burst; + uint64_t burst_stats[3]; uint16_t pktnb_stats[3]; uint16_t nb_pkt; int burst_percent[3]; @@ -1723,8 +1723,8 @@ pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs) } if (total_burst == 0) return; - burst_percent[0] = (burst_stats[0] * 100) / total_burst; - printf(" %s-bursts : %u [%d%% of %d pkts", rx_tx, total_burst, + burst_percent[0] = (double)burst_stats[0] / total_burst * 100; + printf(" %s-bursts : %"PRIu64" [%d%% of %d pkts", rx_tx, total_burst, burst_percent[0], (int) pktnb_stats[0]); if (burst_stats[0] == total_burst) { printf("]\n"); @@ -1735,7 +1735,7 @@ pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs) 100 - burst_percent[0], pktnb_stats[1]); return; } - burst_percent[1] = (burst_stats[1] * 100) / total_burst; + burst_percent[1] = (double)burst_stats[1] / total_burst * 100; burst_percent[2] = 100 - (burst_percent[0] + burst_percent[1]); if ((burst_percent[1] == 0) || (burst_percent[2] == 0)) { printf(" + %d%% of others]\n", 100 - burst_percent[0]); -- 2.17.1
The number of empty polls provides information about available CPU head room in the presence of continuous polling. Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Reviewed-by: Phil Yang <phil.yang@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test-pmd/csumonly.c | 5 +-- app/test-pmd/icmpecho.c | 6 ++-- app/test-pmd/iofwd.c | 6 ++-- app/test-pmd/macfwd.c | 6 ++-- app/test-pmd/macswap.c | 6 ++-- app/test-pmd/rxonly.c | 6 ++-- app/test-pmd/testpmd.c | 74 ++++++++++++++++++++++++----------------- 7 files changed, 61 insertions(+), 48 deletions(-) diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index 862622379..0a96b24eb 100644 --- a/app/test-pmd/csumonly.c +++ b/app/test-pmd/csumonly.c @@ -802,11 +802,12 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) /* receive a burst of packet */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); - if (unlikely(nb_rx == 0)) - return; #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif + if (unlikely(nb_rx == 0)) + return; + fs->rx_packets += nb_rx; rx_bad_ip_csum = 0; rx_bad_l4_csum = 0; diff --git a/app/test-pmd/icmpecho.c b/app/test-pmd/icmpecho.c index 65aece16c..78e3adf2c 100644 --- a/app/test-pmd/icmpecho.c +++ b/app/test-pmd/icmpecho.c @@ -308,12 +308,12 @@ reply_to_icmp_echo_rqsts(struct fwd_stream *fs) */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); - if (unlikely(nb_rx == 0)) - return; - #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif + if (unlikely(nb_rx == 0)) + return; + fs->rx_packets += nb_rx; nb_replies = 0; for (i = 0; i < nb_rx; i++) { diff --git a/app/test-pmd/iofwd.c b/app/test-pmd/iofwd.c index 9dce76efe..22b59bbda 100644 --- a/app/test-pmd/iofwd.c +++ b/app/test-pmd/iofwd.c @@ -66,13 +66,13 @@ pkt_burst_io_forward(struct fwd_stream *fs) */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; +#endif if (unlikely(nb_rx == 0)) return; fs->rx_packets += nb_rx; -#ifdef RTE_TEST_PMD_RECORD_BURST_STATS - fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; -#endif nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); /* diff --git a/app/test-pmd/macfwd.c b/app/test-pmd/macfwd.c index d2ebb1105..0a89d94be 100644 --- a/app/test-pmd/macfwd.c +++ b/app/test-pmd/macfwd.c @@ -71,12 +71,12 @@ pkt_burst_mac_forward(struct fwd_stream *fs) */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); - if (unlikely(nb_rx == 0)) - return; - #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif + if (unlikely(nb_rx == 0)) + return; + fs->rx_packets += nb_rx; txp = &ports[fs->tx_port]; tx_offloads = txp->dev_conf.txmode.offloads; diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c index 8428c26d8..fbe8cb39e 100644 --- a/app/test-pmd/macswap.c +++ b/app/test-pmd/macswap.c @@ -72,12 +72,12 @@ pkt_burst_mac_swap(struct fwd_stream *fs) */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); - if (unlikely(nb_rx == 0)) - return; - #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif + if (unlikely(nb_rx == 0)) + return; + fs->rx_packets += nb_rx; txp = &ports[fs->tx_port]; diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c index 5c65fc42d..18d59a68d 100644 --- a/app/test-pmd/rxonly.c +++ b/app/test-pmd/rxonly.c @@ -63,12 +63,12 @@ pkt_burst_receive(struct fwd_stream *fs) */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); - if (unlikely(nb_rx == 0)) - return; - #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif + if (unlikely(nb_rx == 0)) + return; + fs->rx_packets += nb_rx; for (i = 0; i < nb_rx; i++) rte_pktmbuf_free(pkts_burst[i]); diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 2e1493da2..9075143b7 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -1692,57 +1692,69 @@ init_fwd_streams(void) static void pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs) { - uint64_t total_burst; + uint64_t total_burst, sburst; uint64_t nb_burst; - uint64_t burst_stats[3]; - uint16_t pktnb_stats[3]; + uint64_t burst_stats[4]; + uint16_t pktnb_stats[4]; uint16_t nb_pkt; - int burst_percent[3]; + int burst_percent[4], sburstp; + int i; /* * First compute the total number of packet bursts and the * two highest numbers of bursts of the same number of packets. */ total_burst = 0; - burst_stats[0] = burst_stats[1] = burst_stats[2] = 0; - pktnb_stats[0] = pktnb_stats[1] = pktnb_stats[2] = 0; + memset(&burst_stats, 0x0, sizeof(burst_stats)); + memset(&pktnb_stats, 0x0, sizeof(pktnb_stats)); + for (nb_pkt = 0; nb_pkt < MAX_PKT_BURST; nb_pkt++) { nb_burst = pbs->pkt_burst_spread[nb_pkt]; - if (nb_burst == 0) - continue; total_burst += nb_burst; - if (nb_burst > burst_stats[0]) { - burst_stats[1] = burst_stats[0]; - pktnb_stats[1] = pktnb_stats[0]; + + /* Show empty poll stats always */ + if (nb_pkt == 0) { burst_stats[0] = nb_burst; pktnb_stats[0] = nb_pkt; - } else if (nb_burst > burst_stats[1]) { + continue; + } + + if (nb_burst == 0) + continue; + + if (nb_burst > burst_stats[1]) { + burst_stats[2] = burst_stats[1]; + pktnb_stats[2] = pktnb_stats[1]; burst_stats[1] = nb_burst; pktnb_stats[1] = nb_pkt; + } else if (nb_burst > burst_stats[2]) { + burst_stats[2] = nb_burst; + pktnb_stats[2] = nb_pkt; } } if (total_burst == 0) return; - burst_percent[0] = (double)burst_stats[0] / total_burst * 100; - printf(" %s-bursts : %"PRIu64" [%d%% of %d pkts", rx_tx, total_burst, - burst_percent[0], (int) pktnb_stats[0]); - if (burst_stats[0] == total_burst) { - printf("]\n"); - return; - } - if (burst_stats[0] + burst_stats[1] == total_burst) { - printf(" + %d%% of %d pkts]\n", - 100 - burst_percent[0], pktnb_stats[1]); - return; - } - burst_percent[1] = (double)burst_stats[1] / total_burst * 100; - burst_percent[2] = 100 - (burst_percent[0] + burst_percent[1]); - if ((burst_percent[1] == 0) || (burst_percent[2] == 0)) { - printf(" + %d%% of others]\n", 100 - burst_percent[0]); - return; + + printf(" %s-bursts : %"PRIu64" [", rx_tx, total_burst); + for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) { + if (i == 3) { + printf("%d%% of other]\n", 100 - sburstp); + return; + } + + sburst += burst_stats[i]; + if (sburst == total_burst) { + printf("%d%% of %d pkts]\n", + 100 - sburstp, (int) pktnb_stats[i]); + return; + } + + burst_percent[i] = + (double)burst_stats[i] / total_burst * 100; + printf("%d%% of %d pkts + ", + burst_percent[i], (int) pktnb_stats[i]); + sburstp += burst_percent[i]; } - printf(" + %d%% of %d pkts + %d%% of others]\n", - burst_percent[1], (int) pktnb_stats[1], burst_percent[2]); } #endif /* RTE_TEST_PMD_RECORD_BURST_STATS */ -- 2.17.1
On Wed, Jun 17, 2020 at 8:13 PM Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> wrote: > > The throughput calculation requires a counter that measures > passing of time. The PMU cycle counter does not do that. This It is not clear from git commit on why PMU cycle counter does not do that? On dpdk bootup, we are figuring out the Hz value based on PMU counter cycles. What is the missing piece here? IMO, PMU counter should have less latency and more granularity than clock_getime. > results in incorrect throughput numbers when > RTE_ARM_EAL_RDTSC_USE_PMU is enabled. Use clock_gettime > system call to calculate the time passed since last call. > > Bugzilla ID: 450 > Fixes: 0e106980301d ("app/testpmd: show throughput in port stats") > Cc: zhihong.wang@intel.com > Cc: stable@dpdk.org > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > Reviewed-by: Phil Yang <phil.yang@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > --- > app/test-pmd/config.c | 44 +++++++++++++++++++++++++++++-------------- > 1 file changed, 30 insertions(+), 14 deletions(-) > > diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c > index 016bcb09c..91fbf99f8 100644 > --- a/app/test-pmd/config.c > +++ b/app/test-pmd/config.c > @@ -54,6 +54,14 @@ > > #define ETHDEV_FWVERS_LEN 32 > > +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ > +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW > +#else > +#define CLOCK_TYPE_ID CLOCK_MONOTONIC > +#endif > + > +#define NS_PER_SEC 1E9 > + > static char *flowtype_to_str(uint16_t flow_type); > > static const struct { > @@ -136,9 +144,10 @@ nic_stats_display(portid_t port_id) > static uint64_t prev_pkts_tx[RTE_MAX_ETHPORTS]; > static uint64_t prev_bytes_rx[RTE_MAX_ETHPORTS]; > static uint64_t prev_bytes_tx[RTE_MAX_ETHPORTS]; > - static uint64_t prev_cycles[RTE_MAX_ETHPORTS]; > + static uint64_t prev_ns[RTE_MAX_ETHPORTS]; > + struct timespec cur_time; > uint64_t diff_pkts_rx, diff_pkts_tx, diff_bytes_rx, diff_bytes_tx, > - diff_cycles; > + diff_ns; > uint64_t mpps_rx, mpps_tx, mbps_rx, mbps_tx; > struct rte_eth_stats stats; > struct rte_port *port = &ports[port_id]; > @@ -195,10 +204,17 @@ nic_stats_display(portid_t port_id) > } > } > > - diff_cycles = prev_cycles[port_id]; > - prev_cycles[port_id] = rte_rdtsc(); > - if (diff_cycles > 0) > - diff_cycles = prev_cycles[port_id] - diff_cycles; > + diff_ns = 0; > + if (clock_gettime(CLOCK_TYPE_ID, &cur_time) == 0) { > + uint64_t ns; > + > + ns = cur_time.tv_sec * NS_PER_SEC; > + ns += cur_time.tv_nsec; > + > + if (prev_ns[port_id] != 0) > + diff_ns = ns - prev_ns[port_id]; > + prev_ns[port_id] = ns; > + } > > diff_pkts_rx = (stats.ipackets > prev_pkts_rx[port_id]) ? > (stats.ipackets - prev_pkts_rx[port_id]) : 0; > @@ -206,10 +222,10 @@ nic_stats_display(portid_t port_id) > (stats.opackets - prev_pkts_tx[port_id]) : 0; > prev_pkts_rx[port_id] = stats.ipackets; > prev_pkts_tx[port_id] = stats.opackets; > - mpps_rx = diff_cycles > 0 ? > - diff_pkts_rx * rte_get_tsc_hz() / diff_cycles : 0; > - mpps_tx = diff_cycles > 0 ? > - diff_pkts_tx * rte_get_tsc_hz() / diff_cycles : 0; > + mpps_rx = diff_ns > 0 ? > + (double)diff_pkts_rx / diff_ns * NS_PER_SEC : 0; > + mpps_tx = diff_ns > 0 ? > + (double)diff_pkts_tx / diff_ns * NS_PER_SEC : 0; > > diff_bytes_rx = (stats.ibytes > prev_bytes_rx[port_id]) ? > (stats.ibytes - prev_bytes_rx[port_id]) : 0; > @@ -217,10 +233,10 @@ nic_stats_display(portid_t port_id) > (stats.obytes - prev_bytes_tx[port_id]) : 0; > prev_bytes_rx[port_id] = stats.ibytes; > prev_bytes_tx[port_id] = stats.obytes; > - mbps_rx = diff_cycles > 0 ? > - diff_bytes_rx * rte_get_tsc_hz() / diff_cycles : 0; > - mbps_tx = diff_cycles > 0 ? > - diff_bytes_tx * rte_get_tsc_hz() / diff_cycles : 0; > + mbps_rx = diff_ns > 0 ? > + (double)diff_bytes_rx / diff_ns * NS_PER_SEC : 0; > + mbps_tx = diff_ns > 0 ? > + (double)diff_bytes_tx / diff_ns * NS_PER_SEC : 0; > > printf("\n Throughput (since last show)\n"); > printf(" Rx-pps: %12"PRIu64" Rx-bps: %12"PRIu64"\n Tx-pps: %12" > -- > 2.17.1 >
Thanks Jerin for the feedback > -----Original Message----- > From: Jerin Jacob <jerinjacobk@gmail.com> > Sent: Wednesday, June 17, 2020 10:16 AM > To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> > Cc: dpdk-dev <dev@dpdk.org>; Ali Alnubani <alialnu@mellanox.com>; > orgerlitz@mellanox.com; Wenzhuo Lu <wenzhuo.lu@intel.com>; Beilei Xing > <beilei.xing@intel.com>; Bernard Iremonger <bernard.iremonger@intel.com>; > hemant.agrawal@nxp.com; jerinj@marvell.com; Slava Ovsiienko > <viacheslavo@mellanox.com>; thomas@monjalon.net; Ruifeng Wang > <Ruifeng.Wang@arm.com>; Phil Yang <Phil.Yang@arm.com>; nd > <nd@arm.com>; Zhihong Wang <zhihong.wang@intel.com>; dpdk stable > <stable@dpdk.org> > Subject: Re: [dpdk-dev] [PATCH 1/5] app/testpmd: clock gettime call in > throughput calculation > > On Wed, Jun 17, 2020 at 8:13 PM Honnappa Nagarahalli > <honnappa.nagarahalli@arm.com> wrote: > > > > The throughput calculation requires a counter that measures passing of > > time. The PMU cycle counter does not do that. This > > > It is not clear from git commit on why PMU cycle counter does not do that? > On dpdk bootup, we are figuring out the Hz value based on PMU counter > cycles. > What is the missing piece here? As I understand Linux kernel saves the PMU state and restores it every time a thread is scheduled out and in. So, when the thread is scheduled out the PMU cycles are not counted towards that thread. The thread that prints the statistics issues good amount of system calls and I am guessing it is getting scheduled out. So, it is reporting very low cycle count. > > IMO, PMU counter should have less latency and more granularity than > clock_getime. In general, agree. In this particular calculation the granularity has not mattered much (for ex: numbers are fine with 50Mhz generic counter and 2.5Ghz CPU). The latency also does not matter as it is getting amortized over a large number of packets. So, I do not see it affecting the reported PPS/BPS numbers. > > > results in incorrect throughput numbers when > RTE_ARM_EAL_RDTSC_USE_PMU > > is enabled. Use clock_gettime system call to calculate the time passed > > since last call. > > > > Bugzilla ID: 450 > > Fixes: 0e106980301d ("app/testpmd: show throughput in port stats") > > Cc: zhihong.wang@intel.com > > Cc: stable@dpdk.org > > > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > > Reviewed-by: Phil Yang <phil.yang@arm.com> > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > > --- > > app/test-pmd/config.c | 44 > > +++++++++++++++++++++++++++++-------------- > > 1 file changed, 30 insertions(+), 14 deletions(-) > > > > diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c index > > 016bcb09c..91fbf99f8 100644 > > --- a/app/test-pmd/config.c > > +++ b/app/test-pmd/config.c > > @@ -54,6 +54,14 @@ > > > > #define ETHDEV_FWVERS_LEN 32 > > > > +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ #define > > +CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW #else #define CLOCK_TYPE_ID > > +CLOCK_MONOTONIC #endif > > + > > +#define NS_PER_SEC 1E9 > > + > > static char *flowtype_to_str(uint16_t flow_type); > > > > static const struct { > > @@ -136,9 +144,10 @@ nic_stats_display(portid_t port_id) > > static uint64_t prev_pkts_tx[RTE_MAX_ETHPORTS]; > > static uint64_t prev_bytes_rx[RTE_MAX_ETHPORTS]; > > static uint64_t prev_bytes_tx[RTE_MAX_ETHPORTS]; > > - static uint64_t prev_cycles[RTE_MAX_ETHPORTS]; > > + static uint64_t prev_ns[RTE_MAX_ETHPORTS]; > > + struct timespec cur_time; > > uint64_t diff_pkts_rx, diff_pkts_tx, diff_bytes_rx, diff_bytes_tx, > > - diff_cycles; > > + > > + diff_ns; > > uint64_t mpps_rx, mpps_tx, mbps_rx, mbps_tx; > > struct rte_eth_stats stats; > > struct rte_port *port = &ports[port_id]; @@ -195,10 +204,17 @@ > > nic_stats_display(portid_t port_id) > > } > > } > > > > - diff_cycles = prev_cycles[port_id]; > > - prev_cycles[port_id] = rte_rdtsc(); > > - if (diff_cycles > 0) > > - diff_cycles = prev_cycles[port_id] - diff_cycles; > > + diff_ns = 0; > > + if (clock_gettime(CLOCK_TYPE_ID, &cur_time) == 0) { > > + uint64_t ns; > > + > > + ns = cur_time.tv_sec * NS_PER_SEC; > > + ns += cur_time.tv_nsec; > > + > > + if (prev_ns[port_id] != 0) > > + diff_ns = ns - prev_ns[port_id]; > > + prev_ns[port_id] = ns; > > + } > > > > diff_pkts_rx = (stats.ipackets > prev_pkts_rx[port_id]) ? > > (stats.ipackets - prev_pkts_rx[port_id]) : 0; @@ > > -206,10 +222,10 @@ nic_stats_display(portid_t port_id) > > (stats.opackets - prev_pkts_tx[port_id]) : 0; > > prev_pkts_rx[port_id] = stats.ipackets; > > prev_pkts_tx[port_id] = stats.opackets; > > - mpps_rx = diff_cycles > 0 ? > > - diff_pkts_rx * rte_get_tsc_hz() / diff_cycles : 0; > > - mpps_tx = diff_cycles > 0 ? > > - diff_pkts_tx * rte_get_tsc_hz() / diff_cycles : 0; > > + mpps_rx = diff_ns > 0 ? > > + (double)diff_pkts_rx / diff_ns * NS_PER_SEC : 0; > > + mpps_tx = diff_ns > 0 ? > > + (double)diff_pkts_tx / diff_ns * NS_PER_SEC : 0; > > > > diff_bytes_rx = (stats.ibytes > prev_bytes_rx[port_id]) ? > > (stats.ibytes - prev_bytes_rx[port_id]) : 0; @@ > > -217,10 +233,10 @@ nic_stats_display(portid_t port_id) > > (stats.obytes - prev_bytes_tx[port_id]) : 0; > > prev_bytes_rx[port_id] = stats.ibytes; > > prev_bytes_tx[port_id] = stats.obytes; > > - mbps_rx = diff_cycles > 0 ? > > - diff_bytes_rx * rte_get_tsc_hz() / diff_cycles : 0; > > - mbps_tx = diff_cycles > 0 ? > > - diff_bytes_tx * rte_get_tsc_hz() / diff_cycles : 0; > > + mbps_rx = diff_ns > 0 ? > > + (double)diff_bytes_rx / diff_ns * NS_PER_SEC : 0; > > + mbps_tx = diff_ns > 0 ? > > + (double)diff_bytes_tx / diff_ns * NS_PER_SEC : 0; > > > > printf("\n Throughput (since last show)\n"); > > printf(" Rx-pps: %12"PRIu64" Rx-bps: %12"PRIu64"\n Tx- > pps: %12" > > -- > > 2.17.1 > >
On Wed, Jun 17, 2020 at 09:43:05AM -0500, Honnappa Nagarahalli wrote:
>From: Phil Yang <phil.yang@arm.com>
>
>Add burst stats for noisy vnf mode.
>
>Fixes: 3c156061b938 ("app/testpmd: add noisy neighbour forwarding mode")
>Cc: stable@dpdk.org
>Cc: jfreimann@redhat.com
>
>Signed-off-by: Phil Yang <phil.yang@arm.com>
>Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
>Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>---
> app/test-pmd/noisy_vnf.c | 12 ++++++++++++
> 1 file changed, 12 insertions(+)
>
Reviewed-by: Jens Freimann <jfreimann@redhat.com>
regards,
Jens
On Thu, Jun 18, 2020 at 9:33 AM Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote: > > Thanks Jerin for the feedback > > > -----Original Message----- > > From: Jerin Jacob <jerinjacobk@gmail.com> > > Sent: Wednesday, June 17, 2020 10:16 AM > > To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> > > Cc: dpdk-dev <dev@dpdk.org>; Ali Alnubani <alialnu@mellanox.com>; > > orgerlitz@mellanox.com; Wenzhuo Lu <wenzhuo.lu@intel.com>; Beilei Xing > > <beilei.xing@intel.com>; Bernard Iremonger <bernard.iremonger@intel.com>; > > hemant.agrawal@nxp.com; jerinj@marvell.com; Slava Ovsiienko > > <viacheslavo@mellanox.com>; thomas@monjalon.net; Ruifeng Wang > > <Ruifeng.Wang@arm.com>; Phil Yang <Phil.Yang@arm.com>; nd > > <nd@arm.com>; Zhihong Wang <zhihong.wang@intel.com>; dpdk stable > > <stable@dpdk.org> > > Subject: Re: [dpdk-dev] [PATCH 1/5] app/testpmd: clock gettime call in > > throughput calculation > > > > On Wed, Jun 17, 2020 at 8:13 PM Honnappa Nagarahalli > > <honnappa.nagarahalli@arm.com> wrote: > > > > > > The throughput calculation requires a counter that measures passing of > > > time. The PMU cycle counter does not do that. This > > > > > > It is not clear from git commit on why PMU cycle counter does not do that? > > On dpdk bootup, we are figuring out the Hz value based on PMU counter > > cycles. > > What is the missing piece here? > As I understand Linux kernel saves the PMU state and restores it every time a thread is scheduled out and in. So, when the thread is scheduled out the PMU cycles are not counted towards that thread. The thread that prints the statistics issues good amount of system calls and I am guessing it is getting scheduled out. So, it is reporting very low cycle count. OK. Probably add this info in git commit. > > > > IMO, PMU counter should have less latency and more granularity than > > clock_getime. > In general, agree. In this particular calculation the granularity has not mattered much (for ex: numbers are fine with 50Mhz generic counter and 2.5Ghz CPU). The latency also does not matter as it is getting amortized over a large number of packets. So, I do not see it affecting the reported PPS/BPS numbers. Reasonable to use clock_gettime for the control thread. > > > > > > results in incorrect throughput numbers when > > RTE_ARM_EAL_RDTSC_USE_PMU > > > is enabled. Use clock_gettime system call to calculate the time passed > > > since last call. > > > > > > Bugzilla ID: 450 > > > Fixes: 0e106980301d ("app/testpmd: show throughput in port stats") > > > Cc: zhihong.wang@intel.com > > > Cc: stable@dpdk.org > > > > > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > > > Reviewed-by: Phil Yang <phil.yang@arm.com> > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > > > --- > > > app/test-pmd/config.c | 44 > > > +++++++++++++++++++++++++++++-------------- > > > 1 file changed, 30 insertions(+), 14 deletions(-) > > > > > > diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c index > > > 016bcb09c..91fbf99f8 100644 > > > --- a/app/test-pmd/config.c > > > +++ b/app/test-pmd/config.c > > > @@ -54,6 +54,14 @@ > > > > > > #define ETHDEV_FWVERS_LEN 32 > > > > > > +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ #define > > > +CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW #else #define CLOCK_TYPE_ID > > > +CLOCK_MONOTONIC #endif > > > + > > > +#define NS_PER_SEC 1E9 > > > + > > > static char *flowtype_to_str(uint16_t flow_type); > > > > > > static const struct { > > > @@ -136,9 +144,10 @@ nic_stats_display(portid_t port_id) > > > static uint64_t prev_pkts_tx[RTE_MAX_ETHPORTS]; > > > static uint64_t prev_bytes_rx[RTE_MAX_ETHPORTS]; > > > static uint64_t prev_bytes_tx[RTE_MAX_ETHPORTS]; > > > - static uint64_t prev_cycles[RTE_MAX_ETHPORTS]; > > > + static uint64_t prev_ns[RTE_MAX_ETHPORTS]; > > > + struct timespec cur_time; > > > uint64_t diff_pkts_rx, diff_pkts_tx, diff_bytes_rx, diff_bytes_tx, > > > - diff_cycles; > > > + > > > + diff_ns; > > > uint64_t mpps_rx, mpps_tx, mbps_rx, mbps_tx; > > > struct rte_eth_stats stats; > > > struct rte_port *port = &ports[port_id]; @@ -195,10 +204,17 @@ > > > nic_stats_display(portid_t port_id) > > > } > > > } > > > > > > - diff_cycles = prev_cycles[port_id]; > > > - prev_cycles[port_id] = rte_rdtsc(); > > > - if (diff_cycles > 0) > > > - diff_cycles = prev_cycles[port_id] - diff_cycles; > > > + diff_ns = 0; > > > + if (clock_gettime(CLOCK_TYPE_ID, &cur_time) == 0) { > > > + uint64_t ns; > > > + > > > + ns = cur_time.tv_sec * NS_PER_SEC; > > > + ns += cur_time.tv_nsec; > > > + > > > + if (prev_ns[port_id] != 0) > > > + diff_ns = ns - prev_ns[port_id]; > > > + prev_ns[port_id] = ns; > > > + } > > > > > > diff_pkts_rx = (stats.ipackets > prev_pkts_rx[port_id]) ? > > > (stats.ipackets - prev_pkts_rx[port_id]) : 0; @@ > > > -206,10 +222,10 @@ nic_stats_display(portid_t port_id) > > > (stats.opackets - prev_pkts_tx[port_id]) : 0; > > > prev_pkts_rx[port_id] = stats.ipackets; > > > prev_pkts_tx[port_id] = stats.opackets; > > > - mpps_rx = diff_cycles > 0 ? > > > - diff_pkts_rx * rte_get_tsc_hz() / diff_cycles : 0; > > > - mpps_tx = diff_cycles > 0 ? > > > - diff_pkts_tx * rte_get_tsc_hz() / diff_cycles : 0; > > > + mpps_rx = diff_ns > 0 ? > > > + (double)diff_pkts_rx / diff_ns * NS_PER_SEC : 0; > > > + mpps_tx = diff_ns > 0 ? > > > + (double)diff_pkts_tx / diff_ns * NS_PER_SEC : 0; > > > > > > diff_bytes_rx = (stats.ibytes > prev_bytes_rx[port_id]) ? > > > (stats.ibytes - prev_bytes_rx[port_id]) : 0; @@ > > > -217,10 +233,10 @@ nic_stats_display(portid_t port_id) > > > (stats.obytes - prev_bytes_tx[port_id]) : 0; > > > prev_bytes_rx[port_id] = stats.ibytes; > > > prev_bytes_tx[port_id] = stats.obytes; > > > - mbps_rx = diff_cycles > 0 ? > > > - diff_bytes_rx * rte_get_tsc_hz() / diff_cycles : 0; > > > - mbps_tx = diff_cycles > 0 ? > > > - diff_bytes_tx * rte_get_tsc_hz() / diff_cycles : 0; > > > + mbps_rx = diff_ns > 0 ? > > > + (double)diff_bytes_rx / diff_ns * NS_PER_SEC : 0; > > > + mbps_tx = diff_ns > 0 ? > > > + (double)diff_bytes_tx / diff_ns * NS_PER_SEC : 0; > > > > > > printf("\n Throughput (since last show)\n"); > > > printf(" Rx-pps: %12"PRIu64" Rx-bps: %12"PRIu64"\n Tx- > > pps: %12" > > > -- > > > 2.17.1 > > >
> -----Original Message-----
> From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Sent: Wednesday, June 17, 2020 10:43 PM
> To: dev@dpdk.org; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; alialnu@mellanox.com;
> orgerlitz@mellanox.com; wenzhuo.lu@intel.com; beilei.xing@intel.com;
> bernard.iremonger@intel.com
> Cc: hemant.agrawal@nxp.com; jerinj@marvell.com;
> viacheslavo@mellanox.com; thomas@monjalon.net; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Phil Yang <Phil.Yang@arm.com>; nd
> <nd@arm.com>; stable@dpdk.org
> Subject: [PATCH 2/5] app/testpmd: enable burst stats for flowgen mode rx
> path
>
> Enable missing burst stats for rx path for flowgen mode.
>
> Fixes: e9e23a617eb8 ("app/testpmd: add flowgen forwarding engine")
> Cc: stable@dpdk.org
>
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
Tested-by: Phil Yang <phil.yang@arm.com>
> -----Original Message-----
> From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Sent: Wednesday, June 17, 2020 10:43 PM
> To: dev@dpdk.org; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; alialnu@mellanox.com;
> orgerlitz@mellanox.com; wenzhuo.lu@intel.com; beilei.xing@intel.com;
> bernard.iremonger@intel.com
> Cc: hemant.agrawal@nxp.com; jerinj@marvell.com;
> viacheslavo@mellanox.com; thomas@monjalon.net; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Phil Yang <Phil.Yang@arm.com>; nd
> <nd@arm.com>; stable@dpdk.org
> Subject: [PATCH 4/5] app/testpmd: fix burst percentage calculation
>
> The burst % calculation can over flow due to multiplication.
> Fix the multiplication and increase the size of variables to
> 64b.
>
> Fixes: af75078fece3 ("first public release")
> Cc: stable@dpdk.org
>
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
Tested-by: Phil Yang <phil.yang@arm.com>
> -----Original Message-----
> From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Sent: Wednesday, June 17, 2020 10:43 PM
> To: dev@dpdk.org; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; alialnu@mellanox.com;
> orgerlitz@mellanox.com; wenzhuo.lu@intel.com; beilei.xing@intel.com;
> bernard.iremonger@intel.com
> Cc: hemant.agrawal@nxp.com; jerinj@marvell.com;
> viacheslavo@mellanox.com; thomas@monjalon.net; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Phil Yang <Phil.Yang@arm.com>; nd
> <nd@arm.com>
> Subject: [PATCH 5/5] app/testpmd: enable empty polls in burst stats
>
> The number of empty polls provides information about available
> CPU head room in the presence of continuous polling.
>
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
> app/test-pmd/csumonly.c | 5 +--
> app/test-pmd/icmpecho.c | 6 ++--
> app/test-pmd/iofwd.c | 6 ++--
> app/test-pmd/macfwd.c | 6 ++--
> app/test-pmd/macswap.c | 6 ++--
> app/test-pmd/rxonly.c | 6 ++--
> app/test-pmd/testpmd.c | 74 ++++++++++++++++++++++++-----------------
> 7 files changed, 61 insertions(+), 48 deletions(-)
All these modes have been verified.
Tested-by: Phil Yang <phil.yang@arm.com>
> -----Original Message-----
> From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Sent: Wednesday, June 17, 2020 10:43 PM
> To: dev@dpdk.org; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; alialnu@mellanox.com;
> orgerlitz@mellanox.com; wenzhuo.lu@intel.com; beilei.xing@intel.com;
> bernard.iremonger@intel.com
> Cc: hemant.agrawal@nxp.com; jerinj@marvell.com;
> viacheslavo@mellanox.com; thomas@monjalon.net; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Phil Yang <Phil.Yang@arm.com>; nd
> <nd@arm.com>; zhihong.wang@intel.com; stable@dpdk.org
> Subject: [PATCH 1/5] app/testpmd: clock gettime call in throughput
> calculation
>
> The throughput calculation requires a counter that measures
> passing of time. The PMU cycle counter does not do that. This
> results in incorrect throughput numbers when
> RTE_ARM_EAL_RDTSC_USE_PMU is enabled. Use clock_gettime
> system call to calculate the time passed since last call.
>
> Bugzilla ID: 450
> Fixes: 0e106980301d ("app/testpmd: show throughput in port stats")
> Cc: zhihong.wang@intel.com
> Cc: stable@dpdk.org
>
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
Tested-by: Phil Yang <phil.yang@arm.com>
> -----Original Message-----
> From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Sent: Wednesday, June 17, 2020 5:43 PM
> To: dev@dpdk.org; honnappa.nagarahalli@arm.com; Ali Alnubani
> <alialnu@mellanox.com>; orgerlitz@mellanox.com; wenzhuo.lu@intel.com;
> beilei.xing@intel.com; bernard.iremonger@intel.com
> Cc: hemant.agrawal@nxp.com; jerinj@marvell.com; Slava Ovsiienko
> <viacheslavo@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
> ruifeng.wang@arm.com; phil.yang@arm.com; nd@arm.com;
> zhihong.wang@intel.com; stable@dpdk.org
> Subject: [PATCH 1/5] app/testpmd: clock gettime call in throughput
> calculation
>
> The throughput calculation requires a counter that measures passing of time.
> The PMU cycle counter does not do that. This results in incorrect throughput
> numbers when RTE_ARM_EAL_RDTSC_USE_PMU is enabled. Use
> clock_gettime system call to calculate the time passed since last call.
>
> Bugzilla ID: 450
> Fixes: 0e106980301d ("app/testpmd: show throughput in port stats")
> Cc: zhihong.wang@intel.com
> Cc: stable@dpdk.org
>
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Tested-by: Ali Alnubani <alialnu@mellanox.com>
> -----Original Message-----
> From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Sent: Wednesday, June 17, 2020 5:43 PM
> To: dev@dpdk.org; honnappa.nagarahalli@arm.com; Ali Alnubani
> <alialnu@mellanox.com>; orgerlitz@mellanox.com; wenzhuo.lu@intel.com;
> beilei.xing@intel.com; bernard.iremonger@intel.com
> Cc: hemant.agrawal@nxp.com; jerinj@marvell.com; Slava Ovsiienko
> <viacheslavo@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
> ruifeng.wang@arm.com; phil.yang@arm.com; nd@arm.com;
> stable@dpdk.org
> Subject: [PATCH 4/5] app/testpmd: fix burst percentage calculation
>
> The burst % calculation can over flow due to multiplication.
> Fix the multiplication and increase the size of variables to 64b.
>
> Fixes: af75078fece3 ("first public release")
> Cc: stable@dpdk.org
>
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Tested-by: Ali Alnubani <alialnu@mellanox.com>
> -----Original Message-----
> From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Sent: Wednesday, June 17, 2020 5:43 PM
> To: dev@dpdk.org; honnappa.nagarahalli@arm.com; Ali Alnubani
> <alialnu@mellanox.com>; orgerlitz@mellanox.com; wenzhuo.lu@intel.com;
> beilei.xing@intel.com; bernard.iremonger@intel.com
> Cc: hemant.agrawal@nxp.com; jerinj@marvell.com; Slava Ovsiienko
> <viacheslavo@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
> ruifeng.wang@arm.com; phil.yang@arm.com; nd@arm.com
> Subject: [PATCH 5/5] app/testpmd: enable empty polls in burst stats
>
> The number of empty polls provides information about available CPU head
> room in the presence of continuous polling.
>
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Tested-by: Ali Alnubani <alialnu@mellanox.com>
The throughput calculation requires a counter that measures passing of time. However, the kernel saves and restores the PMU state when a thread is unscheduled and scheduled. This ensures that the PMU cycles are not counted towards a thread that is not scheduled. Hence, when RTE_ARM_EAL_RDTSC_USE_PMU is enabled, the PMU cycles do not represent the passing of time. This results in incorrect calculation of throughput numbers. Use clock_gettime system call to calculate the time passed since last call. Bugzilla ID: 450 Fixes: 0e106980301d ("app/testpmd: show throughput in port stats") Cc: zhihong.wang@intel.com Cc: stable@dpdk.org Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Reviewed-by: Phil Yang <phil.yang@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Tested-by: Phil Yang <phil.yang@arm.com> Tested-by: Ali Alnubani <alialnu@mellanox.com> --- v2: Updated commit log in 1/5 (Jerin) app/test-pmd/config.c | 44 +++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c index 016bcb09c..91fbf99f8 100644 --- a/app/test-pmd/config.c +++ b/app/test-pmd/config.c @@ -54,6 +54,14 @@ #define ETHDEV_FWVERS_LEN 32 +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW +#else +#define CLOCK_TYPE_ID CLOCK_MONOTONIC +#endif + +#define NS_PER_SEC 1E9 + static char *flowtype_to_str(uint16_t flow_type); static const struct { @@ -136,9 +144,10 @@ nic_stats_display(portid_t port_id) static uint64_t prev_pkts_tx[RTE_MAX_ETHPORTS]; static uint64_t prev_bytes_rx[RTE_MAX_ETHPORTS]; static uint64_t prev_bytes_tx[RTE_MAX_ETHPORTS]; - static uint64_t prev_cycles[RTE_MAX_ETHPORTS]; + static uint64_t prev_ns[RTE_MAX_ETHPORTS]; + struct timespec cur_time; uint64_t diff_pkts_rx, diff_pkts_tx, diff_bytes_rx, diff_bytes_tx, - diff_cycles; + diff_ns; uint64_t mpps_rx, mpps_tx, mbps_rx, mbps_tx; struct rte_eth_stats stats; struct rte_port *port = &ports[port_id]; @@ -195,10 +204,17 @@ nic_stats_display(portid_t port_id) } } - diff_cycles = prev_cycles[port_id]; - prev_cycles[port_id] = rte_rdtsc(); - if (diff_cycles > 0) - diff_cycles = prev_cycles[port_id] - diff_cycles; + diff_ns = 0; + if (clock_gettime(CLOCK_TYPE_ID, &cur_time) == 0) { + uint64_t ns; + + ns = cur_time.tv_sec * NS_PER_SEC; + ns += cur_time.tv_nsec; + + if (prev_ns[port_id] != 0) + diff_ns = ns - prev_ns[port_id]; + prev_ns[port_id] = ns; + } diff_pkts_rx = (stats.ipackets > prev_pkts_rx[port_id]) ? (stats.ipackets - prev_pkts_rx[port_id]) : 0; @@ -206,10 +222,10 @@ nic_stats_display(portid_t port_id) (stats.opackets - prev_pkts_tx[port_id]) : 0; prev_pkts_rx[port_id] = stats.ipackets; prev_pkts_tx[port_id] = stats.opackets; - mpps_rx = diff_cycles > 0 ? - diff_pkts_rx * rte_get_tsc_hz() / diff_cycles : 0; - mpps_tx = diff_cycles > 0 ? - diff_pkts_tx * rte_get_tsc_hz() / diff_cycles : 0; + mpps_rx = diff_ns > 0 ? + (double)diff_pkts_rx / diff_ns * NS_PER_SEC : 0; + mpps_tx = diff_ns > 0 ? + (double)diff_pkts_tx / diff_ns * NS_PER_SEC : 0; diff_bytes_rx = (stats.ibytes > prev_bytes_rx[port_id]) ? (stats.ibytes - prev_bytes_rx[port_id]) : 0; @@ -217,10 +233,10 @@ nic_stats_display(portid_t port_id) (stats.obytes - prev_bytes_tx[port_id]) : 0; prev_bytes_rx[port_id] = stats.ibytes; prev_bytes_tx[port_id] = stats.obytes; - mbps_rx = diff_cycles > 0 ? - diff_bytes_rx * rte_get_tsc_hz() / diff_cycles : 0; - mbps_tx = diff_cycles > 0 ? - diff_bytes_tx * rte_get_tsc_hz() / diff_cycles : 0; + mbps_rx = diff_ns > 0 ? + (double)diff_bytes_rx / diff_ns * NS_PER_SEC : 0; + mbps_tx = diff_ns > 0 ? + (double)diff_bytes_tx / diff_ns * NS_PER_SEC : 0; printf("\n Throughput (since last show)\n"); printf(" Rx-pps: %12"PRIu64" Rx-bps: %12"PRIu64"\n Tx-pps: %12" -- 2.17.1
Enable missing burst stats for rx path for flowgen mode. Fixes: e9e23a617eb8 ("app/testpmd: add flowgen forwarding engine") Cc: stable@dpdk.org Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Reviewed-by: Phil Yang <phil.yang@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Tested-by: Phil Yang <phil.yang@arm.com> --- app/test-pmd/flowgen.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c index 4bd351e67..011887c61 100644 --- a/app/test-pmd/flowgen.c +++ b/app/test-pmd/flowgen.c @@ -111,6 +111,10 @@ pkt_burst_flow_gen(struct fwd_stream *fs) /* Receive a burst of packets and discard them. */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; +#endif + fs->rx_packets += nb_rx; for (i = 0; i < nb_rx; i++) -- 2.17.1
From: Phil Yang <phil.yang@arm.com> Add burst stats for noisy vnf mode. Fixes: 3c156061b938 ("app/testpmd: add noisy neighbour forwarding mode") Cc: stable@dpdk.org Cc: jfreimann@redhat.com Signed-off-by: Phil Yang <phil.yang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Jens Freimann <jfreimann@redhat.com> --- app/test-pmd/noisy_vnf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c index 58c4ee925..24f286da6 100644 --- a/app/test-pmd/noisy_vnf.c +++ b/app/test-pmd/noisy_vnf.c @@ -154,6 +154,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; +#endif if (unlikely(nb_rx == 0)) goto flush; fs->rx_packets += nb_rx; @@ -164,6 +167,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) pkts_burst, nb_rx); if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) nb_tx += do_retry(nb_rx, nb_tx, pkts_burst, fs); +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; +#endif fs->tx_packets += nb_tx; fs->fwd_dropped += drop_pkts(pkts_burst, nb_rx, nb_tx); return; @@ -187,6 +193,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) nb_deqd); if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) nb_tx += do_retry(nb_rx, nb_tx, tmp_pkts, fs); +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; +#endif fs->fwd_dropped += drop_pkts(tmp_pkts, nb_deqd, nb_tx); } } @@ -211,6 +220,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) tmp_pkts, nb_deqd); if (unlikely(sent < nb_deqd) && fs->retry_enabled) nb_tx += do_retry(nb_rx, nb_tx, tmp_pkts, fs); +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; +#endif fs->fwd_dropped += drop_pkts(tmp_pkts, nb_deqd, sent); ncf->prev_time = rte_get_timer_cycles(); } -- 2.17.1
The burst % calculation can over flow due to multiplication. Fix the multiplication and increase the size of variables to 64b. Fixes: af75078fece3 ("first public release") Cc: stable@dpdk.org Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Reviewed-by: Phil Yang <phil.yang@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Tested-by: Phil Yang <phil.yang@arm.com> Tested-by: Ali Alnubani <alialnu@mellanox.com> --- app/test-pmd/testpmd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 4989d22ca..2e1493da2 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -1692,9 +1692,9 @@ init_fwd_streams(void) static void pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs) { - unsigned int total_burst; - unsigned int nb_burst; - unsigned int burst_stats[3]; + uint64_t total_burst; + uint64_t nb_burst; + uint64_t burst_stats[3]; uint16_t pktnb_stats[3]; uint16_t nb_pkt; int burst_percent[3]; @@ -1723,8 +1723,8 @@ pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs) } if (total_burst == 0) return; - burst_percent[0] = (burst_stats[0] * 100) / total_burst; - printf(" %s-bursts : %u [%d%% of %d pkts", rx_tx, total_burst, + burst_percent[0] = (double)burst_stats[0] / total_burst * 100; + printf(" %s-bursts : %"PRIu64" [%d%% of %d pkts", rx_tx, total_burst, burst_percent[0], (int) pktnb_stats[0]); if (burst_stats[0] == total_burst) { printf("]\n"); @@ -1735,7 +1735,7 @@ pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs) 100 - burst_percent[0], pktnb_stats[1]); return; } - burst_percent[1] = (burst_stats[1] * 100) / total_burst; + burst_percent[1] = (double)burst_stats[1] / total_burst * 100; burst_percent[2] = 100 - (burst_percent[0] + burst_percent[1]); if ((burst_percent[1] == 0) || (burst_percent[2] == 0)) { printf(" + %d%% of others]\n", 100 - burst_percent[0]); -- 2.17.1
The number of empty polls provides information about available CPU head room in the presence of continuous polling. Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Reviewed-by: Phil Yang <phil.yang@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Tested-by: Phil Yang <phil.yang@arm.com> Tested-by: Ali Alnubani <alialnu@mellanox.com> --- app/test-pmd/csumonly.c | 5 +-- app/test-pmd/icmpecho.c | 6 ++-- app/test-pmd/iofwd.c | 6 ++-- app/test-pmd/macfwd.c | 6 ++-- app/test-pmd/macswap.c | 6 ++-- app/test-pmd/rxonly.c | 6 ++-- app/test-pmd/testpmd.c | 74 ++++++++++++++++++++++++----------------- 7 files changed, 61 insertions(+), 48 deletions(-) diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index 862622379..0a96b24eb 100644 --- a/app/test-pmd/csumonly.c +++ b/app/test-pmd/csumonly.c @@ -802,11 +802,12 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) /* receive a burst of packet */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); - if (unlikely(nb_rx == 0)) - return; #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif + if (unlikely(nb_rx == 0)) + return; + fs->rx_packets += nb_rx; rx_bad_ip_csum = 0; rx_bad_l4_csum = 0; diff --git a/app/test-pmd/icmpecho.c b/app/test-pmd/icmpecho.c index 65aece16c..78e3adf2c 100644 --- a/app/test-pmd/icmpecho.c +++ b/app/test-pmd/icmpecho.c @@ -308,12 +308,12 @@ reply_to_icmp_echo_rqsts(struct fwd_stream *fs) */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); - if (unlikely(nb_rx == 0)) - return; - #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif + if (unlikely(nb_rx == 0)) + return; + fs->rx_packets += nb_rx; nb_replies = 0; for (i = 0; i < nb_rx; i++) { diff --git a/app/test-pmd/iofwd.c b/app/test-pmd/iofwd.c index 9dce76efe..22b59bbda 100644 --- a/app/test-pmd/iofwd.c +++ b/app/test-pmd/iofwd.c @@ -66,13 +66,13 @@ pkt_burst_io_forward(struct fwd_stream *fs) */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; +#endif if (unlikely(nb_rx == 0)) return; fs->rx_packets += nb_rx; -#ifdef RTE_TEST_PMD_RECORD_BURST_STATS - fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; -#endif nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); /* diff --git a/app/test-pmd/macfwd.c b/app/test-pmd/macfwd.c index d2ebb1105..0a89d94be 100644 --- a/app/test-pmd/macfwd.c +++ b/app/test-pmd/macfwd.c @@ -71,12 +71,12 @@ pkt_burst_mac_forward(struct fwd_stream *fs) */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); - if (unlikely(nb_rx == 0)) - return; - #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif + if (unlikely(nb_rx == 0)) + return; + fs->rx_packets += nb_rx; txp = &ports[fs->tx_port]; tx_offloads = txp->dev_conf.txmode.offloads; diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c index 8428c26d8..fbe8cb39e 100644 --- a/app/test-pmd/macswap.c +++ b/app/test-pmd/macswap.c @@ -72,12 +72,12 @@ pkt_burst_mac_swap(struct fwd_stream *fs) */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); - if (unlikely(nb_rx == 0)) - return; - #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif + if (unlikely(nb_rx == 0)) + return; + fs->rx_packets += nb_rx; txp = &ports[fs->tx_port]; diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c index 5c65fc42d..18d59a68d 100644 --- a/app/test-pmd/rxonly.c +++ b/app/test-pmd/rxonly.c @@ -63,12 +63,12 @@ pkt_burst_receive(struct fwd_stream *fs) */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); - if (unlikely(nb_rx == 0)) - return; - #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif + if (unlikely(nb_rx == 0)) + return; + fs->rx_packets += nb_rx; for (i = 0; i < nb_rx; i++) rte_pktmbuf_free(pkts_burst[i]); diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 2e1493da2..9075143b7 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -1692,57 +1692,69 @@ init_fwd_streams(void) static void pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs) { - uint64_t total_burst; + uint64_t total_burst, sburst; uint64_t nb_burst; - uint64_t burst_stats[3]; - uint16_t pktnb_stats[3]; + uint64_t burst_stats[4]; + uint16_t pktnb_stats[4]; uint16_t nb_pkt; - int burst_percent[3]; + int burst_percent[4], sburstp; + int i; /* * First compute the total number of packet bursts and the * two highest numbers of bursts of the same number of packets. */ total_burst = 0; - burst_stats[0] = burst_stats[1] = burst_stats[2] = 0; - pktnb_stats[0] = pktnb_stats[1] = pktnb_stats[2] = 0; + memset(&burst_stats, 0x0, sizeof(burst_stats)); + memset(&pktnb_stats, 0x0, sizeof(pktnb_stats)); + for (nb_pkt = 0; nb_pkt < MAX_PKT_BURST; nb_pkt++) { nb_burst = pbs->pkt_burst_spread[nb_pkt]; - if (nb_burst == 0) - continue; total_burst += nb_burst; - if (nb_burst > burst_stats[0]) { - burst_stats[1] = burst_stats[0]; - pktnb_stats[1] = pktnb_stats[0]; + + /* Show empty poll stats always */ + if (nb_pkt == 0) { burst_stats[0] = nb_burst; pktnb_stats[0] = nb_pkt; - } else if (nb_burst > burst_stats[1]) { + continue; + } + + if (nb_burst == 0) + continue; + + if (nb_burst > burst_stats[1]) { + burst_stats[2] = burst_stats[1]; + pktnb_stats[2] = pktnb_stats[1]; burst_stats[1] = nb_burst; pktnb_stats[1] = nb_pkt; + } else if (nb_burst > burst_stats[2]) { + burst_stats[2] = nb_burst; + pktnb_stats[2] = nb_pkt; } } if (total_burst == 0) return; - burst_percent[0] = (double)burst_stats[0] / total_burst * 100; - printf(" %s-bursts : %"PRIu64" [%d%% of %d pkts", rx_tx, total_burst, - burst_percent[0], (int) pktnb_stats[0]); - if (burst_stats[0] == total_burst) { - printf("]\n"); - return; - } - if (burst_stats[0] + burst_stats[1] == total_burst) { - printf(" + %d%% of %d pkts]\n", - 100 - burst_percent[0], pktnb_stats[1]); - return; - } - burst_percent[1] = (double)burst_stats[1] / total_burst * 100; - burst_percent[2] = 100 - (burst_percent[0] + burst_percent[1]); - if ((burst_percent[1] == 0) || (burst_percent[2] == 0)) { - printf(" + %d%% of others]\n", 100 - burst_percent[0]); - return; + + printf(" %s-bursts : %"PRIu64" [", rx_tx, total_burst); + for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) { + if (i == 3) { + printf("%d%% of other]\n", 100 - sburstp); + return; + } + + sburst += burst_stats[i]; + if (sburst == total_burst) { + printf("%d%% of %d pkts]\n", + 100 - sburstp, (int) pktnb_stats[i]); + return; + } + + burst_percent[i] = + (double)burst_stats[i] / total_burst * 100; + printf("%d%% of %d pkts + ", + burst_percent[i], (int) pktnb_stats[i]); + sburstp += burst_percent[i]; } - printf(" + %d%% of %d pkts + %d%% of others]\n", - burst_percent[1], (int) pktnb_stats[1], burst_percent[2]); } #endif /* RTE_TEST_PMD_RECORD_BURST_STATS */ -- 2.17.1
On 6/26/2020 11:09 PM, Honnappa Nagarahalli wrote: > The throughput calculation requires a counter that measures > passing of time. However, the kernel saves and restores the PMU > state when a thread is unscheduled and scheduled. This ensures > that the PMU cycles are not counted towards a thread that is > not scheduled. Hence, when RTE_ARM_EAL_RDTSC_USE_PMU is enabled, > the PMU cycles do not represent the passing of time. Does this mean 'rte_rdtsc()' is broken for Arm? Wouldn't it cause more serious issue than testpmd throughput stats? > This results in incorrect calculation of throughput numbers. > Use clock_gettime system call to calculate the time passed since > last call. > > Bugzilla ID: 450 > Fixes: 0e106980301d ("app/testpmd: show throughput in port stats") > Cc: zhihong.wang@intel.com > Cc: stable@dpdk.org > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > Reviewed-by: Phil Yang <phil.yang@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > Tested-by: Phil Yang <phil.yang@arm.com> > Tested-by: Ali Alnubani <alialnu@mellanox.com> <...> > @@ -195,10 +204,17 @@ nic_stats_display(portid_t port_id) > } > } > > - diff_cycles = prev_cycles[port_id]; > - prev_cycles[port_id] = rte_rdtsc(); > - if (diff_cycles > 0) > - diff_cycles = prev_cycles[port_id] - diff_cycles; > + diff_ns = 0; > + if (clock_gettime(CLOCK_TYPE_ID, &cur_time) == 0) { I guess 'rte_rdtsc()' is faster, but since this is not in the data path, I think it is OK. <...> > @@ -217,10 +233,10 @@ nic_stats_display(portid_t port_id) > (stats.obytes - prev_bytes_tx[port_id]) : 0; > prev_bytes_rx[port_id] = stats.ibytes; > prev_bytes_tx[port_id] = stats.obytes; > - mbps_rx = diff_cycles > 0 ? > - diff_bytes_rx * rte_get_tsc_hz() / diff_cycles : 0; > - mbps_tx = diff_cycles > 0 ? > - diff_bytes_tx * rte_get_tsc_hz() / diff_cycles : 0; > + mbps_rx = diff_ns > 0 ? > + (double)diff_bytes_rx / diff_ns * NS_PER_SEC : 0; > + mbps_tx = diff_ns > 0 ? > + (double)diff_bytes_tx / diff_ns * NS_PER_SEC : 0; The calculation also fixes other issue in the stats. With previous method, if the sampling between two stats is a little long, "diff_pkts_rx * rte_get_tsc_hz()" can overflow and produce wrong 'bps'. Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
On 6/26/2020 11:09 PM, Honnappa Nagarahalli wrote:
> Enable missing burst stats for rx path for flowgen mode.
>
> Fixes: e9e23a617eb8 ("app/testpmd: add flowgen forwarding engine")
> Cc: stable@dpdk.org
>
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Tested-by: Phil Yang <phil.yang@arm.com>
> ---
> app/test-pmd/flowgen.c | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
> index 4bd351e67..011887c61 100644
> --- a/app/test-pmd/flowgen.c
> +++ b/app/test-pmd/flowgen.c
> @@ -111,6 +111,10 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
> /* Receive a burst of packets and discard them. */
> nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
> nb_pkt_per_burst);
> +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS
> + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
> +#endif
> +
Flow generator is for Tx, what is the point of getting stats for the Rx?
On 6/26/2020 11:09 PM, Honnappa Nagarahalli wrote:
> From: Phil Yang <phil.yang@arm.com>
>
> Add burst stats for noisy vnf mode.
>
> Fixes: 3c156061b938 ("app/testpmd: add noisy neighbour forwarding mode")
> Cc: stable@dpdk.org
> Cc: jfreimann@redhat.com
>
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Jens Freimann <jfreimann@redhat.com>
> ---
> app/test-pmd/noisy_vnf.c | 12 ++++++++++++
> 1 file changed, 12 insertions(+)
>
> diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c
> index 58c4ee925..24f286da6 100644
> --- a/app/test-pmd/noisy_vnf.c
> +++ b/app/test-pmd/noisy_vnf.c
> @@ -154,6 +154,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs)
>
> nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue,
> pkts_burst, nb_pkt_per_burst);
> +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS
> + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
> +#endif
> if (unlikely(nb_rx == 0))
> goto flush;
> fs->rx_packets += nb_rx;
> @@ -164,6 +167,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs)
> pkts_burst, nb_rx);
> if (unlikely(nb_tx < nb_rx) && fs->retry_enabled)
> nb_tx += do_retry(nb_rx, nb_tx, pkts_burst, fs);
> +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS
> + fs->tx_burst_stats.pkt_burst_spread[nb_tx]++;
> +#endif
> fs->tx_packets += nb_tx;
> fs->fwd_dropped += drop_pkts(pkts_burst, nb_rx, nb_tx);
> return;
No objection to add the burst stats to more forwarding engines, but this config
does not exist for the meson and make is going away.
We need to figure out how to support this option with meson before spreading it out.
On 6/26/2020 11:09 PM, Honnappa Nagarahalli wrote:
> The burst % calculation can over flow due to multiplication.
> Fix the multiplication and increase the size of variables to
> 64b.
>
> Fixes: af75078fece3 ("first public release")
> Cc: stable@dpdk.org
>
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Tested-by: Phil Yang <phil.yang@arm.com>
> Tested-by: Ali Alnubani <alialnu@mellanox.com>
Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
Hi Ferruh, Thanks for your comments. <snip> > Subject: Re: [PATCH v2 1/5] app/testpmd: clock gettime call in throughput > calculation > > On 6/26/2020 11:09 PM, Honnappa Nagarahalli wrote: > > The throughput calculation requires a counter that measures passing of > > time. However, the kernel saves and restores the PMU state when a > > thread is unscheduled and scheduled. This ensures that the PMU cycles > > are not counted towards a thread that is not scheduled. Hence, when > > RTE_ARM_EAL_RDTSC_USE_PMU is enabled, the PMU cycles do not > represent > > the passing of time. > > Does this mean 'rte_rdtsc()' is broken for Arm? It depends on the kernel I think. IMO, for isolated CPUs it should be fine. It is currently getting fixed through a kernel patch. Once the kernel patch is up streamed, we will make the required changes in DPDK. > Wouldn't it cause more serious issue than testpmd throughput stats? Within DPDK, it does not seem to be used for anything critical (I have fixed one). > > > This results in incorrect calculation of throughput numbers. > > Use clock_gettime system call to calculate the time passed since last > > call. > > > > Bugzilla ID: 450 > > Fixes: 0e106980301d ("app/testpmd: show throughput in port stats") > > Cc: zhihong.wang@intel.com > > Cc: stable@dpdk.org > > > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > > Reviewed-by: Phil Yang <phil.yang@arm.com> > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > > Tested-by: Phil Yang <phil.yang@arm.com> > > Tested-by: Ali Alnubani <alialnu@mellanox.com> > > <...> > > > @@ -195,10 +204,17 @@ nic_stats_display(portid_t port_id) > > } > > } > > > > - diff_cycles = prev_cycles[port_id]; > > - prev_cycles[port_id] = rte_rdtsc(); > > - if (diff_cycles > 0) > > - diff_cycles = prev_cycles[port_id] - diff_cycles; > > + diff_ns = 0; > > + if (clock_gettime(CLOCK_TYPE_ID, &cur_time) == 0) { > > I guess 'rte_rdtsc()' is faster, but since this is not in the data path, I think it is > OK. > > <...> > > > @@ -217,10 +233,10 @@ nic_stats_display(portid_t port_id) > > (stats.obytes - prev_bytes_tx[port_id]) : 0; > > prev_bytes_rx[port_id] = stats.ibytes; > > prev_bytes_tx[port_id] = stats.obytes; > > - mbps_rx = diff_cycles > 0 ? > > - diff_bytes_rx * rte_get_tsc_hz() / diff_cycles : 0; > > - mbps_tx = diff_cycles > 0 ? > > - diff_bytes_tx * rte_get_tsc_hz() / diff_cycles : 0; > > + mbps_rx = diff_ns > 0 ? > > + (double)diff_bytes_rx / diff_ns * NS_PER_SEC : 0; > > + mbps_tx = diff_ns > 0 ? > > + (double)diff_bytes_tx / diff_ns * NS_PER_SEC : 0; > > The calculation also fixes other issue in the stats. > With previous method, if the sampling between two stats is a little long, > "diff_pkts_rx * rte_get_tsc_hz()" can overflow and produce wrong 'bps'. > > > Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
<snip>
> Subject: Re: [PATCH v2 3/5] app/testpmd: enable burst stats for noisy vnf
> mode
>
> On 6/26/2020 11:09 PM, Honnappa Nagarahalli wrote:
> > From: Phil Yang <phil.yang@arm.com>
> >
> > Add burst stats for noisy vnf mode.
> >
> > Fixes: 3c156061b938 ("app/testpmd: add noisy neighbour forwarding
> > mode")
> > Cc: stable@dpdk.org
> > Cc: jfreimann@redhat.com
> >
> > Signed-off-by: Phil Yang <phil.yang@arm.com>
> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > Reviewed-by: Jens Freimann <jfreimann@redhat.com>
> > ---
> > app/test-pmd/noisy_vnf.c | 12 ++++++++++++
> > 1 file changed, 12 insertions(+)
> >
> > diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c index
> > 58c4ee925..24f286da6 100644
> > --- a/app/test-pmd/noisy_vnf.c
> > +++ b/app/test-pmd/noisy_vnf.c
> > @@ -154,6 +154,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs)
> >
> > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue,
> > pkts_burst, nb_pkt_per_burst);
> > +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS
> > + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
> > +#endif
> > if (unlikely(nb_rx == 0))
> > goto flush;
> > fs->rx_packets += nb_rx;
> > @@ -164,6 +167,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs)
> > pkts_burst, nb_rx);
> > if (unlikely(nb_tx < nb_rx) && fs->retry_enabled)
> > nb_tx += do_retry(nb_rx, nb_tx, pkts_burst, fs);
> > +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS
> > + fs->tx_burst_stats.pkt_burst_spread[nb_tx]++;
> > +#endif
> > fs->tx_packets += nb_tx;
> > fs->fwd_dropped += drop_pkts(pkts_burst, nb_rx, nb_tx);
> > return;
>
> No objection to add the burst stats to more forwarding engines, but this
> config does not exist for the meson and make is going away.
>
> We need to figure out how to support this option with meson before
> spreading it out.
Dharmik is working on making this a run time flag that can be enabled or disabled from command line (as was suggested in other emails). That would remove the requirement on meson.
This compile time flag existed before and this patch fixes a bug. IMO, we should separate the issue of how to enable this flag using meson from this bug fix.
On 6/26/2020 11:09 PM, Honnappa Nagarahalli wrote: > The number of empty polls provides information about available > CPU head room in the presence of continuous polling. +1 to have it, it can be useful. > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > Reviewed-by: Phil Yang <phil.yang@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > Tested-by: Phil Yang <phil.yang@arm.com> > Tested-by: Ali Alnubani <alialnu@mellanox.com> <...> > /* > * First compute the total number of packet bursts and the > * two highest numbers of bursts of the same number of packets. > */ > total_burst = 0; > - burst_stats[0] = burst_stats[1] = burst_stats[2] = 0; > - pktnb_stats[0] = pktnb_stats[1] = pktnb_stats[2] = 0; > + memset(&burst_stats, 0x0, sizeof(burst_stats)); > + memset(&pktnb_stats, 0x0, sizeof(pktnb_stats)); > + > for (nb_pkt = 0; nb_pkt < MAX_PKT_BURST; nb_pkt++) { > nb_burst = pbs->pkt_burst_spread[nb_pkt]; > - if (nb_burst == 0) > - continue; > total_burst += nb_burst; > - if (nb_burst > burst_stats[0]) { > - burst_stats[1] = burst_stats[0]; > - pktnb_stats[1] = pktnb_stats[0]; > + > + /* Show empty poll stats always */ > + if (nb_pkt == 0) { > burst_stats[0] = nb_burst; > pktnb_stats[0] = nb_pkt; just a minor issue but this assignment is not needed. > - } else if (nb_burst > burst_stats[1]) { > + continue; > + } > + > + if (nb_burst == 0) > + continue; again another minor issue, can have this check above 'nb_pkt == 0', to prevent unnecssary "burst_stats[0] = nb_burst;" assignment if "nb_burst == 0". <...> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
<snip>
> Subject: Re: [PATCH v2 2/5] app/testpmd: enable burst stats for flowgen mode
> rx path
>
> On 6/26/2020 11:09 PM, Honnappa Nagarahalli wrote:
> > Enable missing burst stats for rx path for flowgen mode.
> >
> > Fixes: e9e23a617eb8 ("app/testpmd: add flowgen forwarding engine")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > Reviewed-by: Phil Yang <phil.yang@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > Tested-by: Phil Yang <phil.yang@arm.com>
> > ---
> > app/test-pmd/flowgen.c | 4 ++++
> > 1 file changed, 4 insertions(+)
> >
> > diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c index
> > 4bd351e67..011887c61 100644
> > --- a/app/test-pmd/flowgen.c
> > +++ b/app/test-pmd/flowgen.c
> > @@ -111,6 +111,10 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
> > /* Receive a burst of packets and discard them. */
> > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
> > nb_pkt_per_burst);
> > +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS
> > + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
> > +#endif
> > +
>
> Flow generator is for Tx, what is the point of getting stats for the Rx?
Is there any reason for this mode to receive the packets?
The reason I ask is, the core cycles include the RX call as well. So, we have cycles/packet that includes the RX call and burst stats that does not show the RX burst stats.
Do we need to remove the call to rte_eth_rx_burst API?
On 7/6/2020 5:59 PM, Honnappa Nagarahalli wrote: > <snip> > >> Subject: Re: [PATCH v2 3/5] app/testpmd: enable burst stats for noisy vnf >> mode >> >> On 6/26/2020 11:09 PM, Honnappa Nagarahalli wrote: >>> From: Phil Yang <phil.yang@arm.com> >>> >>> Add burst stats for noisy vnf mode. >>> >>> Fixes: 3c156061b938 ("app/testpmd: add noisy neighbour forwarding >>> mode") >>> Cc: stable@dpdk.org >>> Cc: jfreimann@redhat.com >>> >>> Signed-off-by: Phil Yang <phil.yang@arm.com> >>> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> >>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> >>> Reviewed-by: Jens Freimann <jfreimann@redhat.com> >>> --- >>> app/test-pmd/noisy_vnf.c | 12 ++++++++++++ >>> 1 file changed, 12 insertions(+) >>> >>> diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c index >>> 58c4ee925..24f286da6 100644 >>> --- a/app/test-pmd/noisy_vnf.c >>> +++ b/app/test-pmd/noisy_vnf.c >>> @@ -154,6 +154,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) >>> >>> nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, >>> pkts_burst, nb_pkt_per_burst); >>> +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS >>> + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; >>> +#endif >>> if (unlikely(nb_rx == 0)) >>> goto flush; >>> fs->rx_packets += nb_rx; >>> @@ -164,6 +167,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) >>> pkts_burst, nb_rx); >>> if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) >>> nb_tx += do_retry(nb_rx, nb_tx, pkts_burst, fs); >>> +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS >>> + fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; >>> +#endif >>> fs->tx_packets += nb_tx; >>> fs->fwd_dropped += drop_pkts(pkts_burst, nb_rx, nb_tx); >>> return; >> >> No objection to add the burst stats to more forwarding engines, but this >> config does not exist for the meson and make is going away. >> >> We need to figure out how to support this option with meson before >> spreading it out. > Dharmik is working on making this a run time flag that can be enabled or disabled from command line (as was suggested in other emails). That would remove the requirement on meson. Cool, thanks. Based on that work, shouldn't we need to update these lines again, instead why not do the update after Dharmik's patch (or in that patch) based on what the new way is? > > This compile time flag existed before and this patch fixes a bug. IMO, we should separate the issue of how to enable this flag using meson from this bug fix. > I know this flag exists before, but what is the bug this patch fixes? I thought this patch enables burst stat for "noisy vnf" forwarding engine.
On 7/6/2020 6:06 PM, Honnappa Nagarahalli wrote:
> <snip>
>
>> Subject: Re: [PATCH v2 2/5] app/testpmd: enable burst stats for flowgen mode
>> rx path
>>
>> On 6/26/2020 11:09 PM, Honnappa Nagarahalli wrote:
>>> Enable missing burst stats for rx path for flowgen mode.
>>>
>>> Fixes: e9e23a617eb8 ("app/testpmd: add flowgen forwarding engine")
>>> Cc: stable@dpdk.org
>>>
>>> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
>>> Reviewed-by: Phil Yang <phil.yang@arm.com>
>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>>> Tested-by: Phil Yang <phil.yang@arm.com>
>>> ---
>>> app/test-pmd/flowgen.c | 4 ++++
>>> 1 file changed, 4 insertions(+)
>>>
>>> diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c index
>>> 4bd351e67..011887c61 100644
>>> --- a/app/test-pmd/flowgen.c
>>> +++ b/app/test-pmd/flowgen.c
>>> @@ -111,6 +111,10 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
>>> /* Receive a burst of packets and discard them. */
>>> nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
>>> nb_pkt_per_burst);
>>> +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS
>>> + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
>>> +#endif
>>> +
>>
>> Flow generator is for Tx, what is the point of getting stats for the Rx?
> Is there any reason for this mode to receive the packets?
> The reason I ask is, the core cycles include the RX call as well. So, we have cycles/packet that includes the RX call and burst stats that does not show the RX burst stats.
> Do we need to remove the call to rte_eth_rx_burst API?
>
The implementation receives packets and frees them immediately.
I assume the Rx is implemented in case port receives packets, like if other end
forwards the generated packets back to same port.
Yes it consumes cycles for Rx but received packets are ignored, I am not sure if
received packets have any meaning in flowgen to get additional burst stats.
On 7/6/2020 5:53 PM, Honnappa Nagarahalli wrote: > Hi Ferruh, > Thanks for your comments. > > <snip> > >> Subject: Re: [PATCH v2 1/5] app/testpmd: clock gettime call in throughput >> calculation >> >> On 6/26/2020 11:09 PM, Honnappa Nagarahalli wrote: >>> The throughput calculation requires a counter that measures passing of >>> time. However, the kernel saves and restores the PMU state when a >>> thread is unscheduled and scheduled. This ensures that the PMU cycles >>> are not counted towards a thread that is not scheduled. Hence, when >>> RTE_ARM_EAL_RDTSC_USE_PMU is enabled, the PMU cycles do not >> represent >>> the passing of time. >> >> Does this mean 'rte_rdtsc()' is broken for Arm? > It depends on the kernel I think. IMO, for isolated CPUs it should be fine. It is currently getting fixed through a kernel patch. Once the kernel patch is up streamed, we will make the required changes in DPDK. > >> Wouldn't it cause more serious issue than testpmd throughput stats? > Within DPDK, it does not seem to be used for anything critical (I have fixed one). OK, thanks for clarification. > >> >>> This results in incorrect calculation of throughput numbers. >>> Use clock_gettime system call to calculate the time passed since last >>> call. >>> >>> Bugzilla ID: 450 >>> Fixes: 0e106980301d ("app/testpmd: show throughput in port stats") >>> Cc: zhihong.wang@intel.com >>> Cc: stable@dpdk.org >>> >>> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> >>> Reviewed-by: Phil Yang <phil.yang@arm.com> >>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> >>> Tested-by: Phil Yang <phil.yang@arm.com> >>> Tested-by: Ali Alnubani <alialnu@mellanox.com> >> >> <...> >> >>> @@ -195,10 +204,17 @@ nic_stats_display(portid_t port_id) >>> } >>> } >>> >>> - diff_cycles = prev_cycles[port_id]; >>> - prev_cycles[port_id] = rte_rdtsc(); >>> - if (diff_cycles > 0) >>> - diff_cycles = prev_cycles[port_id] - diff_cycles; >>> + diff_ns = 0; >>> + if (clock_gettime(CLOCK_TYPE_ID, &cur_time) == 0) { >> >> I guess 'rte_rdtsc()' is faster, but since this is not in the data path, I think it is >> OK. >> >> <...> >> >>> @@ -217,10 +233,10 @@ nic_stats_display(portid_t port_id) >>> (stats.obytes - prev_bytes_tx[port_id]) : 0; >>> prev_bytes_rx[port_id] = stats.ibytes; >>> prev_bytes_tx[port_id] = stats.obytes; >>> - mbps_rx = diff_cycles > 0 ? >>> - diff_bytes_rx * rte_get_tsc_hz() / diff_cycles : 0; >>> - mbps_tx = diff_cycles > 0 ? >>> - diff_bytes_tx * rte_get_tsc_hz() / diff_cycles : 0; >>> + mbps_rx = diff_ns > 0 ? >>> + (double)diff_bytes_rx / diff_ns * NS_PER_SEC : 0; >>> + mbps_tx = diff_ns > 0 ? >>> + (double)diff_bytes_tx / diff_ns * NS_PER_SEC : 0; >> >> The calculation also fixes other issue in the stats. >> With previous method, if the sampling between two stats is a little long, >> "diff_pkts_rx * rte_get_tsc_hz()" can overflow and produce wrong 'bps'. >> >> >> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
<snip> > > > >> Subject: Re: [PATCH v2 3/5] app/testpmd: enable burst stats for noisy > >> vnf mode > >> > >> On 6/26/2020 11:09 PM, Honnappa Nagarahalli wrote: > >>> From: Phil Yang <phil.yang@arm.com> > >>> > >>> Add burst stats for noisy vnf mode. > >>> > >>> Fixes: 3c156061b938 ("app/testpmd: add noisy neighbour forwarding > >>> mode") > >>> Cc: stable@dpdk.org > >>> Cc: jfreimann@redhat.com > >>> > >>> Signed-off-by: Phil Yang <phil.yang@arm.com> > >>> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > >>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > >>> Reviewed-by: Jens Freimann <jfreimann@redhat.com> > >>> --- > >>> app/test-pmd/noisy_vnf.c | 12 ++++++++++++ > >>> 1 file changed, 12 insertions(+) > >>> > >>> diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c > >>> index > >>> 58c4ee925..24f286da6 100644 > >>> --- a/app/test-pmd/noisy_vnf.c > >>> +++ b/app/test-pmd/noisy_vnf.c > >>> @@ -154,6 +154,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) > >>> > >>> nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, > >>> pkts_burst, nb_pkt_per_burst); > >>> +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS > >>> + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; > >>> +#endif > >>> if (unlikely(nb_rx == 0)) > >>> goto flush; > >>> fs->rx_packets += nb_rx; > >>> @@ -164,6 +167,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) > >>> pkts_burst, nb_rx); > >>> if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) > >>> nb_tx += do_retry(nb_rx, nb_tx, pkts_burst, fs); > >>> +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS > >>> + fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; > >>> +#endif > >>> fs->tx_packets += nb_tx; > >>> fs->fwd_dropped += drop_pkts(pkts_burst, nb_rx, nb_tx); > >>> return; > >> > >> No objection to add the burst stats to more forwarding engines, but > >> this config does not exist for the meson and make is going away. > >> > >> We need to figure out how to support this option with meson before > >> spreading it out. > > Dharmik is working on making this a run time flag that can be enabled or > disabled from command line (as was suggested in other emails). That would > remove the requirement on meson. > > Cool, thanks. > Based on that work, shouldn't we need to update these lines again, instead > why not do the update after Dharmik's patch (or in that patch) based on what > the new way is? Yes, agree, these lines need to be updated. I am fine to take this route as well. > > > > > This compile time flag existed before and this patch fixes a bug. IMO, we > should separate the issue of how to enable this flag using meson from this > bug fix. > > > > I know this flag exists before, but what is the bug this patch fixes? I thought > this patch enables burst stat for "noisy vnf" forwarding engine. If one enables 'RTE_TEST_PMD_RECORD_BURST_STATS' expecting the burst stats for 'noisy vnf' engine, it is not reported today.
<snip> > > > >> Subject: Re: [PATCH v2 2/5] app/testpmd: enable burst stats for > >> flowgen mode rx path > >> > >> On 6/26/2020 11:09 PM, Honnappa Nagarahalli wrote: > >>> Enable missing burst stats for rx path for flowgen mode. > >>> > >>> Fixes: e9e23a617eb8 ("app/testpmd: add flowgen forwarding engine") > >>> Cc: stable@dpdk.org > >>> > >>> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > >>> Reviewed-by: Phil Yang <phil.yang@arm.com> > >>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > >>> Tested-by: Phil Yang <phil.yang@arm.com> > >>> --- > >>> app/test-pmd/flowgen.c | 4 ++++ > >>> 1 file changed, 4 insertions(+) > >>> > >>> diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c index > >>> 4bd351e67..011887c61 100644 > >>> --- a/app/test-pmd/flowgen.c > >>> +++ b/app/test-pmd/flowgen.c > >>> @@ -111,6 +111,10 @@ pkt_burst_flow_gen(struct fwd_stream *fs) > >>> /* Receive a burst of packets and discard them. */ > >>> nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, > >>> nb_pkt_per_burst); > >>> +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS > >>> + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; > >>> +#endif > >>> + > >> > >> Flow generator is for Tx, what is the point of getting stats for the Rx? > > Is there any reason for this mode to receive the packets? > > The reason I ask is, the core cycles include the RX call as well. So, we have > cycles/packet that includes the RX call and burst stats that does not show the > RX burst stats. > > Do we need to remove the call to rte_eth_rx_burst API? > > > > The implementation receives packets and frees them immediately. > I assume the Rx is implemented in case port receives packets, like if other end > forwards the generated packets back to same port. I tried to find more info, there is not a lot. The original commit message says 'it is similar to 'txonly' engine, but will generate multiple L4 flows'. Your assumption seems reasonable to me. In that case, I think it makes sense to include 'core cycles' only for TX. Otherwise, there is not enough information to debug any issues. > Yes it consumes cycles for Rx but received packets are ignored, I am not sure if > received packets have any meaning in flowgen to get additional burst stats. Please see above, I think we need to adjust how the 'core cycles' are counted in this case.
Hi Ferruh, Thanks for the review comments <snip> > Subject: Re: [PATCH v2 5/5] app/testpmd: enable empty polls in burst stats > > On 6/26/2020 11:09 PM, Honnappa Nagarahalli wrote: > > The number of empty polls provides information about available CPU > > head room in the presence of continuous polling. > > +1 to have it, it can be useful. > > > > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > > Reviewed-by: Phil Yang <phil.yang@arm.com> > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > > Tested-by: Phil Yang <phil.yang@arm.com> > > Tested-by: Ali Alnubani <alialnu@mellanox.com> > > <...> > > > /* > > * First compute the total number of packet bursts and the > > * two highest numbers of bursts of the same number of packets. > > */ > > total_burst = 0; > > - burst_stats[0] = burst_stats[1] = burst_stats[2] = 0; > > - pktnb_stats[0] = pktnb_stats[1] = pktnb_stats[2] = 0; > > + memset(&burst_stats, 0x0, sizeof(burst_stats)); > > + memset(&pktnb_stats, 0x0, sizeof(pktnb_stats)); > > + > > for (nb_pkt = 0; nb_pkt < MAX_PKT_BURST; nb_pkt++) { > > nb_burst = pbs->pkt_burst_spread[nb_pkt]; > > - if (nb_burst == 0) > > - continue; > > total_burst += nb_burst; > > - if (nb_burst > burst_stats[0]) { > > - burst_stats[1] = burst_stats[0]; > > - pktnb_stats[1] = pktnb_stats[0]; > > + > > + /* Show empty poll stats always */ > > + if (nb_pkt == 0) { > > burst_stats[0] = nb_burst; > > pktnb_stats[0] = nb_pkt; > > just a minor issue but this assignment is not needed. The empty poll stats are being shown always (even if there were no empty polls). The check 'nb_pkts == 0' indicates that the burst stats are for empty polls. Hence this assignment is required. But, this can be moved out of the loop to provide clarity. I will do that. > > > - } else if (nb_burst > burst_stats[1]) { > > + continue; > > + } > > + > > + if (nb_burst == 0) > > + continue; > > again another minor issue, can have this check above 'nb_pkt == 0', to prevent > unnecssary "burst_stats[0] = nb_burst;" assignment if "nb_burst == 0". The empty polls are always shown, even if there were 0% empty polls. > > <...> > > Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
The throughput calculation requires a counter that measures passing of time. However, the kernel saves and restores the PMU state when a thread is unscheduled and scheduled. This ensures that the PMU cycles are not counted towards a thread that is not scheduled. Hence, when RTE_ARM_EAL_RDTSC_USE_PMU is enabled, the PMU cycles do not represent the passing of time. This results in incorrect calculation of throughput numbers. Use clock_gettime system call to calculate the time passed since last call. Bugzilla ID: 450 Fixes: 0e106980301d ("app/testpmd: show throughput in port stats") Cc: zhihong.wang@intel.com Cc: stable@dpdk.org Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Reviewed-by: Phil Yang <phil.yang@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com> Tested-by: Phil Yang <phil.yang@arm.com> Tested-by: Ali Alnubani <alialnu@mellanox.com> --- v3: 1) Dropped 'noisy vnf' patch, will add with future Dharmik's patch 2) Dropped flow gen patch, send out a separate one if there is consensus on the mailing list 3) Changed the logic to display burst stats to make it easier to understand (Ferruh) v2: Updated commit log in 1/5 (Jerin) app/test-pmd/config.c | 44 +++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c index 016bcb09c..91fbf99f8 100644 --- a/app/test-pmd/config.c +++ b/app/test-pmd/config.c @@ -54,6 +54,14 @@ #define ETHDEV_FWVERS_LEN 32 +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW +#else +#define CLOCK_TYPE_ID CLOCK_MONOTONIC +#endif + +#define NS_PER_SEC 1E9 + static char *flowtype_to_str(uint16_t flow_type); static const struct { @@ -136,9 +144,10 @@ nic_stats_display(portid_t port_id) static uint64_t prev_pkts_tx[RTE_MAX_ETHPORTS]; static uint64_t prev_bytes_rx[RTE_MAX_ETHPORTS]; static uint64_t prev_bytes_tx[RTE_MAX_ETHPORTS]; - static uint64_t prev_cycles[RTE_MAX_ETHPORTS]; + static uint64_t prev_ns[RTE_MAX_ETHPORTS]; + struct timespec cur_time; uint64_t diff_pkts_rx, diff_pkts_tx, diff_bytes_rx, diff_bytes_tx, - diff_cycles; + diff_ns; uint64_t mpps_rx, mpps_tx, mbps_rx, mbps_tx; struct rte_eth_stats stats; struct rte_port *port = &ports[port_id]; @@ -195,10 +204,17 @@ nic_stats_display(portid_t port_id) } } - diff_cycles = prev_cycles[port_id]; - prev_cycles[port_id] = rte_rdtsc(); - if (diff_cycles > 0) - diff_cycles = prev_cycles[port_id] - diff_cycles; + diff_ns = 0; + if (clock_gettime(CLOCK_TYPE_ID, &cur_time) == 0) { + uint64_t ns; + + ns = cur_time.tv_sec * NS_PER_SEC; + ns += cur_time.tv_nsec; + + if (prev_ns[port_id] != 0) + diff_ns = ns - prev_ns[port_id]; + prev_ns[port_id] = ns; + } diff_pkts_rx = (stats.ipackets > prev_pkts_rx[port_id]) ? (stats.ipackets - prev_pkts_rx[port_id]) : 0; @@ -206,10 +222,10 @@ nic_stats_display(portid_t port_id) (stats.opackets - prev_pkts_tx[port_id]) : 0; prev_pkts_rx[port_id] = stats.ipackets; prev_pkts_tx[port_id] = stats.opackets; - mpps_rx = diff_cycles > 0 ? - diff_pkts_rx * rte_get_tsc_hz() / diff_cycles : 0; - mpps_tx = diff_cycles > 0 ? - diff_pkts_tx * rte_get_tsc_hz() / diff_cycles : 0; + mpps_rx = diff_ns > 0 ? + (double)diff_pkts_rx / diff_ns * NS_PER_SEC : 0; + mpps_tx = diff_ns > 0 ? + (double)diff_pkts_tx / diff_ns * NS_PER_SEC : 0; diff_bytes_rx = (stats.ibytes > prev_bytes_rx[port_id]) ? (stats.ibytes - prev_bytes_rx[port_id]) : 0; @@ -217,10 +233,10 @@ nic_stats_display(portid_t port_id) (stats.obytes - prev_bytes_tx[port_id]) : 0; prev_bytes_rx[port_id] = stats.ibytes; prev_bytes_tx[port_id] = stats.obytes; - mbps_rx = diff_cycles > 0 ? - diff_bytes_rx * rte_get_tsc_hz() / diff_cycles : 0; - mbps_tx = diff_cycles > 0 ? - diff_bytes_tx * rte_get_tsc_hz() / diff_cycles : 0; + mbps_rx = diff_ns > 0 ? + (double)diff_bytes_rx / diff_ns * NS_PER_SEC : 0; + mbps_tx = diff_ns > 0 ? + (double)diff_bytes_tx / diff_ns * NS_PER_SEC : 0; printf("\n Throughput (since last show)\n"); printf(" Rx-pps: %12"PRIu64" Rx-bps: %12"PRIu64"\n Tx-pps: %12" -- 2.17.1
The burst % calculation can over flow due to multiplication. Fix the multiplication and increase the size of variables to 64b. Fixes: af75078fece3 ("first public release") Cc: stable@dpdk.org Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Reviewed-by: Phil Yang <phil.yang@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com> Tested-by: Phil Yang <phil.yang@arm.com> Tested-by: Ali Alnubani <alialnu@mellanox.com> --- app/test-pmd/testpmd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 4989d22ca..2e1493da2 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -1692,9 +1692,9 @@ init_fwd_streams(void) static void pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs) { - unsigned int total_burst; - unsigned int nb_burst; - unsigned int burst_stats[3]; + uint64_t total_burst; + uint64_t nb_burst; + uint64_t burst_stats[3]; uint16_t pktnb_stats[3]; uint16_t nb_pkt; int burst_percent[3]; @@ -1723,8 +1723,8 @@ pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs) } if (total_burst == 0) return; - burst_percent[0] = (burst_stats[0] * 100) / total_burst; - printf(" %s-bursts : %u [%d%% of %d pkts", rx_tx, total_burst, + burst_percent[0] = (double)burst_stats[0] / total_burst * 100; + printf(" %s-bursts : %"PRIu64" [%d%% of %d pkts", rx_tx, total_burst, burst_percent[0], (int) pktnb_stats[0]); if (burst_stats[0] == total_burst) { printf("]\n"); @@ -1735,7 +1735,7 @@ pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs) 100 - burst_percent[0], pktnb_stats[1]); return; } - burst_percent[1] = (burst_stats[1] * 100) / total_burst; + burst_percent[1] = (double)burst_stats[1] / total_burst * 100; burst_percent[2] = 100 - (burst_percent[0] + burst_percent[1]); if ((burst_percent[1] == 0) || (burst_percent[2] == 0)) { printf(" + %d%% of others]\n", 100 - burst_percent[0]); -- 2.17.1
The number of empty polls provides information about available CPU head room in the presence of continuous polling. Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Reviewed-by: Phil Yang <phil.yang@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com> Tested-by: Phil Yang <phil.yang@arm.com> Tested-by: Ali Alnubani <alialnu@mellanox.com> --- app/test-pmd/csumonly.c | 5 +-- app/test-pmd/icmpecho.c | 6 ++-- app/test-pmd/iofwd.c | 6 ++-- app/test-pmd/macfwd.c | 6 ++-- app/test-pmd/macswap.c | 6 ++-- app/test-pmd/rxonly.c | 6 ++-- app/test-pmd/testpmd.c | 77 +++++++++++++++++++++++------------------ 7 files changed, 62 insertions(+), 50 deletions(-) diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index 862622379..0a96b24eb 100644 --- a/app/test-pmd/csumonly.c +++ b/app/test-pmd/csumonly.c @@ -802,11 +802,12 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) /* receive a burst of packet */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); - if (unlikely(nb_rx == 0)) - return; #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif + if (unlikely(nb_rx == 0)) + return; + fs->rx_packets += nb_rx; rx_bad_ip_csum = 0; rx_bad_l4_csum = 0; diff --git a/app/test-pmd/icmpecho.c b/app/test-pmd/icmpecho.c index 65aece16c..78e3adf2c 100644 --- a/app/test-pmd/icmpecho.c +++ b/app/test-pmd/icmpecho.c @@ -308,12 +308,12 @@ reply_to_icmp_echo_rqsts(struct fwd_stream *fs) */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); - if (unlikely(nb_rx == 0)) - return; - #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif + if (unlikely(nb_rx == 0)) + return; + fs->rx_packets += nb_rx; nb_replies = 0; for (i = 0; i < nb_rx; i++) { diff --git a/app/test-pmd/iofwd.c b/app/test-pmd/iofwd.c index 9dce76efe..22b59bbda 100644 --- a/app/test-pmd/iofwd.c +++ b/app/test-pmd/iofwd.c @@ -66,13 +66,13 @@ pkt_burst_io_forward(struct fwd_stream *fs) */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; +#endif if (unlikely(nb_rx == 0)) return; fs->rx_packets += nb_rx; -#ifdef RTE_TEST_PMD_RECORD_BURST_STATS - fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; -#endif nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); /* diff --git a/app/test-pmd/macfwd.c b/app/test-pmd/macfwd.c index d2ebb1105..0a89d94be 100644 --- a/app/test-pmd/macfwd.c +++ b/app/test-pmd/macfwd.c @@ -71,12 +71,12 @@ pkt_burst_mac_forward(struct fwd_stream *fs) */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); - if (unlikely(nb_rx == 0)) - return; - #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif + if (unlikely(nb_rx == 0)) + return; + fs->rx_packets += nb_rx; txp = &ports[fs->tx_port]; tx_offloads = txp->dev_conf.txmode.offloads; diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c index 8428c26d8..fbe8cb39e 100644 --- a/app/test-pmd/macswap.c +++ b/app/test-pmd/macswap.c @@ -72,12 +72,12 @@ pkt_burst_mac_swap(struct fwd_stream *fs) */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); - if (unlikely(nb_rx == 0)) - return; - #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif + if (unlikely(nb_rx == 0)) + return; + fs->rx_packets += nb_rx; txp = &ports[fs->tx_port]; diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c index 5c65fc42d..18d59a68d 100644 --- a/app/test-pmd/rxonly.c +++ b/app/test-pmd/rxonly.c @@ -63,12 +63,12 @@ pkt_burst_receive(struct fwd_stream *fs) */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); - if (unlikely(nb_rx == 0)) - return; - #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif + if (unlikely(nb_rx == 0)) + return; + fs->rx_packets += nb_rx; for (i = 0; i < nb_rx; i++) rte_pktmbuf_free(pkts_burst[i]); diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 2e1493da2..dc93fd5d2 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -1692,57 +1692,68 @@ init_fwd_streams(void) static void pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs) { - uint64_t total_burst; + uint64_t total_burst, sburst; uint64_t nb_burst; - uint64_t burst_stats[3]; - uint16_t pktnb_stats[3]; + uint64_t burst_stats[4]; + uint16_t pktnb_stats[4]; uint16_t nb_pkt; - int burst_percent[3]; + int burst_percent[4], sburstp; + int i; /* * First compute the total number of packet bursts and the * two highest numbers of bursts of the same number of packets. */ - total_burst = 0; - burst_stats[0] = burst_stats[1] = burst_stats[2] = 0; - pktnb_stats[0] = pktnb_stats[1] = pktnb_stats[2] = 0; - for (nb_pkt = 0; nb_pkt < MAX_PKT_BURST; nb_pkt++) { + memset(&burst_stats, 0x0, sizeof(burst_stats)); + memset(&pktnb_stats, 0x0, sizeof(pktnb_stats)); + + /* Show stats for 0 burst size always */ + total_burst = pbs->pkt_burst_spread[0]; + burst_stats[0] = pbs->pkt_burst_spread[0]; + pktnb_stats[0] = 0; + + /* Find the next 2 burst sizes with highest occurances. */ + for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST; nb_pkt++) { nb_burst = pbs->pkt_burst_spread[nb_pkt]; + if (nb_burst == 0) continue; + total_burst += nb_burst; - if (nb_burst > burst_stats[0]) { - burst_stats[1] = burst_stats[0]; - pktnb_stats[1] = pktnb_stats[0]; - burst_stats[0] = nb_burst; - pktnb_stats[0] = nb_pkt; - } else if (nb_burst > burst_stats[1]) { + + if (nb_burst > burst_stats[1]) { + burst_stats[2] = burst_stats[1]; + pktnb_stats[2] = pktnb_stats[1]; burst_stats[1] = nb_burst; pktnb_stats[1] = nb_pkt; + } else if (nb_burst > burst_stats[2]) { + burst_stats[2] = nb_burst; + pktnb_stats[2] = nb_pkt; } } if (total_burst == 0) return; - burst_percent[0] = (double)burst_stats[0] / total_burst * 100; - printf(" %s-bursts : %"PRIu64" [%d%% of %d pkts", rx_tx, total_burst, - burst_percent[0], (int) pktnb_stats[0]); - if (burst_stats[0] == total_burst) { - printf("]\n"); - return; - } - if (burst_stats[0] + burst_stats[1] == total_burst) { - printf(" + %d%% of %d pkts]\n", - 100 - burst_percent[0], pktnb_stats[1]); - return; - } - burst_percent[1] = (double)burst_stats[1] / total_burst * 100; - burst_percent[2] = 100 - (burst_percent[0] + burst_percent[1]); - if ((burst_percent[1] == 0) || (burst_percent[2] == 0)) { - printf(" + %d%% of others]\n", 100 - burst_percent[0]); - return; + + printf(" %s-bursts : %"PRIu64" [", rx_tx, total_burst); + for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) { + if (i == 3) { + printf("%d%% of other]\n", 100 - sburstp); + return; + } + + sburst += burst_stats[i]; + if (sburst == total_burst) { + printf("%d%% of %d pkts]\n", + 100 - sburstp, (int) pktnb_stats[i]); + return; + } + + burst_percent[i] = + (double)burst_stats[i] / total_burst * 100; + printf("%d%% of %d pkts + ", + burst_percent[i], (int) pktnb_stats[i]); + sburstp += burst_percent[i]; } - printf(" + %d%% of %d pkts + %d%% of others]\n", - burst_percent[1], (int) pktnb_stats[1], burst_percent[2]); } #endif /* RTE_TEST_PMD_RECORD_BURST_STATS */ -- 2.17.1
On 7/6/2020 8:11 PM, Honnappa Nagarahalli wrote: > Hi Ferruh, > Thanks for the review comments > > <snip> > >> Subject: Re: [PATCH v2 5/5] app/testpmd: enable empty polls in burst stats >> >> On 6/26/2020 11:09 PM, Honnappa Nagarahalli wrote: >>> The number of empty polls provides information about available CPU >>> head room in the presence of continuous polling. >> >> +1 to have it, it can be useful. >> >>> >>> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> >>> Reviewed-by: Phil Yang <phil.yang@arm.com> >>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> >>> Tested-by: Phil Yang <phil.yang@arm.com> >>> Tested-by: Ali Alnubani <alialnu@mellanox.com> >> >> <...> >> >>> /* >>> * First compute the total number of packet bursts and the >>> * two highest numbers of bursts of the same number of packets. >>> */ >>> total_burst = 0; >>> - burst_stats[0] = burst_stats[1] = burst_stats[2] = 0; >>> - pktnb_stats[0] = pktnb_stats[1] = pktnb_stats[2] = 0; >>> + memset(&burst_stats, 0x0, sizeof(burst_stats)); >>> + memset(&pktnb_stats, 0x0, sizeof(pktnb_stats)); >>> + >>> for (nb_pkt = 0; nb_pkt < MAX_PKT_BURST; nb_pkt++) { >>> nb_burst = pbs->pkt_burst_spread[nb_pkt]; >>> - if (nb_burst == 0) >>> - continue; >>> total_burst += nb_burst; >>> - if (nb_burst > burst_stats[0]) { >>> - burst_stats[1] = burst_stats[0]; >>> - pktnb_stats[1] = pktnb_stats[0]; >>> + >>> + /* Show empty poll stats always */ >>> + if (nb_pkt == 0) { >>> burst_stats[0] = nb_burst; >>> pktnb_stats[0] = nb_pkt; >> >> just a minor issue but this assignment is not needed. > The empty poll stats are being shown always (even if there were no empty polls). The check 'nb_pkts == 0' indicates that the burst stats are for empty polls. Hence this assignment is required. But, this can be moved out of the loop to provide clarity. I will do that. It is not required because of 'memset' above, we know 'nb_pkt == 0' and we know 'pktnb_stats[0] == 0', so "pktnb_stats[0] = nb_pkt;" is redundant. > >> >>> - } else if (nb_burst > burst_stats[1]) { >>> + continue; >>> + } >>> + >>> + if (nb_burst == 0) >>> + continue; >> >> again another minor issue, can have this check above 'nb_pkt == 0', to prevent >> unnecssary "burst_stats[0] = nb_burst;" assignment if "nb_burst == 0". > The empty polls are always shown, even if there were 0% empty polls. I got that, again because of memset, "burst_stats[0] == 0", you can skip "burst_stats[0] = nb_burst;" in case "nb_burst == 0", that is why you can move it up. Anyway, both are trivial issues ... > >> >> <...> >> >> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
On 7/7/2020 12:32 AM, Honnappa Nagarahalli wrote:
> The throughput calculation requires a counter that measures
> passing of time. However, the kernel saves and restores the PMU
> state when a thread is unscheduled and scheduled. This ensures
> that the PMU cycles are not counted towards a thread that is
> not scheduled. Hence, when RTE_ARM_EAL_RDTSC_USE_PMU is enabled,
> the PMU cycles do not represent the passing of time.
> This results in incorrect calculation of throughput numbers.
> Use clock_gettime system call to calculate the time passed since
> last call.
>
> Bugzilla ID: 450
> Fixes: 0e106980301d ("app/testpmd: show throughput in port stats")
> Cc: zhihong.wang@intel.com
> Cc: stable@dpdk.org
>
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
> Tested-by: Phil Yang <phil.yang@arm.com>
> Tested-by: Ali Alnubani <alialnu@mellanox.com>
Series applied to dpdk-next-net/master, thanks.