DPDK patches and discussions
 help / color / mirror / Atom feed
From: Stephen Hemminger <stephen@networkplumber.org>
To: Feifei Wang <feifei.wang2@arm.com>
Cc: dev@dpdk.org, nd@arm.com, Lijian Zhang <lijian.zhang@arm.com>,
	Ruifeng Wang <ruifeng.wang@arm.com>,
	Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Subject: Re: [PATCH] examples/l3fwd: add hard code to collect empty poll and NIC counters
Date: Thu, 11 May 2023 08:44:47 -0700	[thread overview]
Message-ID: <20230511084447.0a93c411@hermes.local> (raw)
In-Reply-To: <20230511082519.4168523-1-feifei.wang2@arm.com>

On Thu, 11 May 2023 16:25:19 +0800
Feifei Wang <feifei.wang2@arm.com> wrote:

> This patch is to collect empty poll of 'rte_eth_rx_burst' functions in
> dpdk l3fwd application. Empty poll means Rx burst function receives no
> pkts in one loop.
> 
> Furthermore, we also add 'nic_xstats_display' API to show NIC counters.
> 
> Usage:
> With this patch, no special settings, just run l3fwd, and when you
> stoping l3fwd, thread will print the info above.
> 
> Note:
> This patch has just a slight impact on performance and can be ignored.

There was a set of other patches around telemetry.
Shouldn't this example use some of that rather than "roll your own"?

> 
> dpdk version:23.03
> 
> Suggested-by: Lijian Zhang <lijian.zhang@arm.com>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>  examples/l3fwd/l3fwd.h     | 68 ++++++++++++++++++++++++++++++++++++++
>  examples/l3fwd/l3fwd_lpm.c | 26 +++++++++++++--
>  examples/l3fwd/main.c      | 22 ++++++++++++
>  3 files changed, 114 insertions(+), 2 deletions(-)
> 
> diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
> index b55855c932..2b3fca62f3 100644
> --- a/examples/l3fwd/l3fwd.h
> +++ b/examples/l3fwd/l3fwd.h
> @@ -56,6 +56,17 @@
>  #define L3FWD_HASH_ENTRIES		(1024*1024*1)
>  #endif
>  
> +struct lcore_stats {
> +    uint32_t nb_rx_pkts[16];
> +    uint32_t num_loop[16];
> +    uint32_t none_loop[16];
> +    uint32_t no_full_loop[16];
> +    float  none_loop_per[16];
> +    float no_full_loop_per[16];
> +} __rte_cache_aligned;

What is the 16 magic value?

Use double instead of float to keep more accuracy?

There maybe holes in this structure?

You may want to allocate it at runtime based on number of actual
cores and get it on the right NUMA node.

> +
> +extern struct lcore_stats stats[RTE_MAX_LCORE];
> +
>  struct parm_cfg {
>  	const char *rule_ipv4_name;
>  	const char *rule_ipv6_name;
> @@ -115,6 +126,63 @@ extern struct acl_algorithms acl_alg[];
>  
>  extern uint32_t max_pkt_len;
>  
> +static inline void
> +nic_xstats_display(uint32_t port_id)
> +{
> +        struct rte_eth_xstat *xstats;
> +        int cnt_xstats, idx_xstat;
> +        struct rte_eth_xstat_name *xstats_names;
> +
> +        printf("###### NIC extended statistics for port %-2d\n", port_id);
> +        if (!rte_eth_dev_is_valid_port(port_id)) {
> +                fprintf(stderr, "Error: Invalid port number %i\n", port_id);
> +                return;
> +        }
> +
> +        /* Get count */
> +        cnt_xstats = rte_eth_xstats_get_names(port_id, NULL, 0);
> +        if (cnt_xstats  < 0) {
> +                fprintf(stderr, "Error: Cannot get count of xstats\n");
> +                return;
> +        }
> +
> +        /* Get id-name lookup table */
> +        xstats_names = malloc(sizeof(struct rte_eth_xstat_name) * cnt_xstats);
> +        if (xstats_names == NULL) {
> +                fprintf(stderr, "Cannot allocate memory for xstats lookup\n");
> +                return;
> +        }
> +        if (cnt_xstats != rte_eth_xstats_get_names(
> +                        port_id, xstats_names, cnt_xstats)) {
> +                fprintf(stderr, "Error: Cannot get xstats lookup\n");
> +                free(xstats_names);
> +                return;
> +        }
> +
> +        /* Get stats themselves */
> +        xstats = malloc(sizeof(struct rte_eth_xstat) * cnt_xstats);
> +        if (xstats == NULL) {
> +                fprintf(stderr, "Cannot allocate memory for xstats\n");
> +                free(xstats_names);
> +                return;
> +        }
> +        if (cnt_xstats != rte_eth_xstats_get(port_id, xstats, cnt_xstats)) {
> +                fprintf(stderr, "Error: Unable to get xstats\n");
> +                free(xstats_names);
> +                free(xstats);
> +                return;
> +        }
> +
> +        /* Display xstats */
> +        for (idx_xstat = 0; idx_xstat < cnt_xstats; idx_xstat++) {
> +                printf("%s: %"PRIu64"\n",
> +                        xstats_names[idx_xstat].name,
> +                        xstats[idx_xstat].value);
> +        }
> +        free(xstats_names);
> +        free(xstats);
> +}
> +
>  /* Send burst of packets on an output interface */
>  static inline int
>  send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t port)
> diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
> index 4ac1925c84..9e27e954b9 100644
> --- a/examples/l3fwd/l3fwd_lpm.c
> +++ b/examples/l3fwd/l3fwd_lpm.c
> @@ -41,6 +41,8 @@
>  static struct rte_lpm *ipv4_l3fwd_lpm_lookup_struct[NB_SOCKETS];
>  static struct rte_lpm6 *ipv6_l3fwd_lpm_lookup_struct[NB_SOCKETS];
>  
> +extern struct lcore_stats stats[RTE_MAX_LCORE];
> +
>  /* Performing LPM-based lookups. 8< */
>  static inline uint16_t
>  lpm_get_ipv4_dst_port(const struct rte_ipv4_hdr *ipv4_hdr,
> @@ -153,6 +155,7 @@ lpm_main_loop(__rte_unused void *dummy)
>  	struct lcore_conf *qconf;
>  	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
>  		US_PER_S * BURST_TX_DRAIN_US;
> +	bool start_count = 0;
>  
>  	lcore_id = rte_lcore_id();
>  	qconf = &lcore_conf[lcore_id];
> @@ -207,8 +210,22 @@ lpm_main_loop(__rte_unused void *dummy)
>  			queueid = qconf->rx_queue_list[i].queue_id;
>  			nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
>  				MAX_PKT_BURST);
> -			if (nb_rx == 0)
> -				continue;
> +			if (start_count == 0) {
> +				if (nb_rx != 0)
> +					start_count = 1;
> +			}
> +
> +			if (start_count == 1) {
> +				stats[lcore_id].nb_rx_pkts[i] += nb_rx;
> +				stats[lcore_id].num_loop[i]++;
> +				if (nb_rx < MAX_PKT_BURST && nb_rx > 0)
> +					stats[lcore_id].no_full_loop[i]++;
> +
> +				if (nb_rx == 0) {
> +					stats[lcore_id].none_loop[i]++;
> +					continue;
> +				}
> +			}

On Arm you are going to need to use a barrier or atomic variable for these
otherwise, there is no guarantee that other thread will read the right value.

>  
>  #if defined RTE_ARCH_X86 || defined __ARM_NEON \
>  			 || defined RTE_ARCH_PPC_64
> @@ -223,6 +240,11 @@ lpm_main_loop(__rte_unused void *dummy)
>  		cur_tsc = rte_rdtsc();
>  	}
>  
> +	for (i = 0; i < n_rx_q; ++i) {
> +		stats[lcore_id].none_loop_per[i] = (float)stats[lcore_id].none_loop[i]/stats[lcore_id].num_loop[i]*100;
> +		stats[lcore_id].no_full_loop_per[i] = (float)stats[lcore_id].no_full_loop[i]/stats[lcore_id].num_loop[i]*100;
> +	}
> +
>  	return 0;
>  }
>  
> diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
> index a4f061537e..4727215eae 100644
> --- a/examples/l3fwd/main.c
> +++ b/examples/l3fwd/main.c
> @@ -53,6 +53,8 @@
>  
>  #define MAX_LCORE_PARAMS 1024
>  
> +struct lcore_stats stats[RTE_MAX_LCORE];
> +
>  uint16_t nb_rxd = RX_DESC_DEFAULT;
>  uint16_t nb_txd = TX_DESC_DEFAULT;
>  
> @@ -1592,6 +1594,26 @@ main(int argc, char **argv)
>  	} else {
>  		rte_eal_mp_wait_lcore();
>  
> +		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> +			if (rte_lcore_is_enabled(lcore_id) == 0)
> +				continue;
> +			qconf = &lcore_conf[lcore_id];
> +			for (queue = 0; queue < qconf->n_rx_queue; ++queue) {
> +				printf("\nlcore id:%d\n", lcore_id);
> +				printf("queue_id:%d\n",queue);
> +				printf("Rx pkt %d\n", stats[lcore_id].nb_rx_pkts[queue]);
> +				printf("loop number: %d, 0 pkts loop:%d, <32 pkts loop:%d\n",
> +					stats[lcore_id].num_loop[queue], stats[lcore_id].none_loop[queue], stats[lcore_id].no_full_loop[queue]);
> +				printf("0 pkts loop percentage:%.2f%%, <32 pkts loop percentage:%.2f%%\n",
> +					stats[lcore_id].none_loop_per[queue], stats[lcore_id].no_full_loop_per[queue]);
> +				printf("------------------------------------\n\n");
> +
> +			}
> +		}
> +
> +		nic_xstats_display(0);
> +		nic_xstats_display(1);
> +
>  		RTE_ETH_FOREACH_DEV(portid) {
>  			if ((enabled_port_mask & (1 << portid)) == 0)
>  				continue;


  parent reply	other threads:[~2023-05-11 15:44 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-11  8:25 Feifei Wang
2023-05-11  8:51 ` Jerin Jacob
2023-05-11  8:57   ` Feifei Wang
2023-05-11 13:32   ` Honnappa Nagarahalli
2023-05-12  1:52     ` Feifei Wang
2023-05-12 12:02     ` Jerin Jacob
2023-05-15 22:50       ` Honnappa Nagarahalli
2023-05-11 15:44 ` Stephen Hemminger [this message]
2023-05-16  6:19   ` Feifei Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230511084447.0a93c411@hermes.local \
    --to=stephen@networkplumber.org \
    --cc=dev@dpdk.org \
    --cc=feifei.wang2@arm.com \
    --cc=honnappa.nagarahalli@arm.com \
    --cc=lijian.zhang@arm.com \
    --cc=nd@arm.com \
    --cc=ruifeng.wang@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).