From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp.tuxdriver.com (charlotte.tuxdriver.com [70.61.120.58]) by dpdk.org (Postfix) with ESMTP id 6F7D880B7 for ; Fri, 10 Oct 2014 19:45:08 +0200 (CEST) Received: from hmsreliant.think-freely.org ([2001:470:8:a08:7aac:c0ff:fec2:933b] helo=localhost) by smtp.tuxdriver.com with esmtpsa (TLSv1:AES128-SHA:128) (Exim 4.63) (envelope-from ) id 1XceMe-0000O3-4o; Fri, 10 Oct 2014 13:52:34 -0400 Date: Fri, 10 Oct 2014 13:52:26 -0400 From: Neil Horman To: Cunming Liang Message-ID: <20141010175226.GG19499@hmsreliant.think-freely.org> References: <1408947174-11323-1-git-send-email-cunming.liang@intel.com> <1412944201-30703-1-git-send-email-cunming.liang@intel.com> <1412944201-30703-2-git-send-email-cunming.liang@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1412944201-30703-2-git-send-email-cunming.liang@intel.com> User-Agent: Mutt/1.5.23 (2014-03-12) X-Spam-Score: -2.9 (--) X-Spam-Status: No Cc: dev@dpdk.org Subject: Re: [dpdk-dev] [PATCH v2 1/4] app/test: unit test for rx and tx cycles/packet X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 10 Oct 2014 17:45:08 -0000 On Fri, Oct 10, 2014 at 08:29:58PM +0800, Cunming Liang wrote: > It provides unit test to measure cycles/packet in NIC loopback mode. > It simply gives the average cycles of IO used per packet without test equipment. > When doing the test, make sure the link is UP. > > Usage Example: > 1. Run unit test app in interactive mode > app/test -c f -n 4 -- -i > 2. Run and wait for the result > pmd_perf_autotest > > There's option to choose rx/tx pair, default is vector. > set_rxtx_mode [vector|scalar|full|hybrid] > Note: To get acurate scalar fast, please choose 'vector' or 'hybrid' without INC_VEC=y in config > > Signed-off-by: Cunming Liang > Acked-by: Bruce Richardson Notes inline > --- > app/test/Makefile | 1 + > app/test/commands.c | 38 +++ > app/test/packet_burst_generator.c | 4 +- > app/test/test.h | 4 + > app/test/test_pmd_perf.c | 626 +++++++++++++++++++++++++++++++++++ > lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 6 + > 6 files changed, 677 insertions(+), 2 deletions(-) > create mode 100644 app/test/test_pmd_perf.c > > diff --git a/app/test/Makefile b/app/test/Makefile > index 6af6d76..ebfa0ba 100644 > --- a/app/test/Makefile > +++ b/app/test/Makefile > @@ -56,6 +56,7 @@ SRCS-y += test_memzone.c > > SRCS-y += test_ring.c > SRCS-y += test_ring_perf.c > +SRCS-y += test_pmd_perf.c > > ifeq ($(CONFIG_RTE_LIBRTE_TABLE),y) > SRCS-y += test_table.c > diff --git a/app/test/commands.c b/app/test/commands.c > index a9e36b1..f1e746e 100644 > --- a/app/test/commands.c > +++ b/app/test/commands.c > @@ -310,12 +310,50 @@ cmdline_parse_inst_t cmd_quit = { > > +#define NB_ETHPORTS_USED (1) > +#define NB_SOCKETS (2) > +#define MEMPOOL_CACHE_SIZE 250 > +#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) Don't you want to size this in accordance with the amount of data your sending (64 Bytes as noted above)? > +static void > +print_ethaddr(const char *name, const struct ether_addr *eth_addr) > +{ > + printf("%s%02X:%02X:%02X:%02X:%02X:%02X", name, > + eth_addr->addr_bytes[0], > + eth_addr->addr_bytes[1], > + eth_addr->addr_bytes[2], > + eth_addr->addr_bytes[3], > + eth_addr->addr_bytes[4], > + eth_addr->addr_bytes[5]); > +} > + This was copieed from print_ethaddr. Seems like a good candidate for a common function in rte_ether.h > +} > + > +static void > +signal_handler(int signum) > +{ > + /* When we receive a USR1 signal, print stats */ I think you mean SIGUSR2, below, SIGUSR1 tears the test down and exits the program > + if (signum == SIGUSR1) { SIGINT instead. Thats the common practice. > + printf("Force Stop!\n"); > + stop = 1; > + } > + if (signum == SIGUSR2) > + stats_display(0); > +} > +/* main processing loop */ > +static int > +main_loop(__rte_unused void *args) > +{ > +#define PACKET_SIZE 64 > +#define FRAME_GAP 12 > +#define MAC_PREAMBLE 8 > + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; > + unsigned lcore_id; > + unsigned i, portid, nb_rx = 0, nb_tx = 0; > + struct lcore_conf *conf; > + uint64_t prev_tsc, cur_tsc; > + int pkt_per_port; > + uint64_t packets_per_second, total_packets; > + > + lcore_id = rte_lcore_id(); > + conf = &lcore_conf[lcore_id]; > + if (conf->status != LCORE_USED) > + return 0; > + > + pkt_per_port = MAX_TRAFIC_BURST / conf->nb_ports; > + > + int idx = 0; > + for (i = 0; i < conf->nb_ports; i++) { > + int num = pkt_per_port; > + portid = conf->portlist[i]; > + printf("inject %d packet to port %d\n", num, portid); > + while (num) { > + nb_tx = RTE_MIN(MAX_PKT_BURST, num); > + nb_tx = rte_eth_tx_burst(portid, 0, > + &tx_burst[idx], nb_tx); > + num -= nb_tx; > + idx += nb_tx; > + } > + } > + printf("Total packets inject to prime ports = %u\n", idx); > + > + packets_per_second = (link_mbps * 1000 * 1000) / > + +((PACKET_SIZE + FRAME_GAP + MAC_PREAMBLE) * CHAR_BIT); > + printf("Each port will do %"PRIu64" packets per second\n", > + +packets_per_second); > + > + total_packets = RTE_TEST_DURATION * conf->nb_ports * packets_per_second; > + printf("Test will stop after at least %"PRIu64" packets received\n", > + + total_packets); > + > + prev_tsc = rte_rdtsc(); > + > + while (likely(!stop)) { > + for (i = 0; i < conf->nb_ports; i++) { > + portid = conf->portlist[i]; > + nb_rx = rte_eth_rx_burst((uint8_t) portid, 0, > + pkts_burst, MAX_PKT_BURST); > + if (unlikely(nb_rx == 0)) { > + idle++; > + continue; > + } > + > + count += nb_rx; Doesn't take into consideration error conditions. rte_eth_rx_burst can return -ENOTSUP > + nb_tx = rte_eth_tx_burst(portid, 0, pkts_burst, nb_rx); Ditto with -ENOTSUP > + if (unlikely(nb_tx < nb_rx)) { What makes this unlikely? Seems like a perfectly reasonable condition to happen to me. If the network is busy, its completely likely that you will receive more frames than you send, if you elect to receive all frames. > + drop += (nb_rx - nb_tx); > + do { > + rte_pktmbuf_free(pkts_burst[nb_tx]); Defer this, it skews your timing > + } while (++nb_tx < nb_rx); > + } > + } > + if (unlikely(count >= total_packets)) > + break; Whats the reasoning here? Do you only ever expect to receive frames that you send? If so, seems like this should call for a big warning to get printed. > + } > + > + cur_tsc = rte_rdtsc(); > + > + for (i = 0; i < conf->nb_ports; i++) { > + portid = conf->portlist[i]; > + int nb_free = pkt_per_port; > + do { /* dry out */ > + nb_rx = rte_eth_rx_burst((uint8_t) portid, 0, > + pkts_burst, MAX_PKT_BURST); > + nb_tx = 0; > + while (nb_tx < nb_rx) > + rte_pktmbuf_free(pkts_burst[nb_tx++]); > + nb_free -= nb_rx; > + } while (nb_free != 0); > + printf("free %d mbuf left in port %u\n", pkt_per_port, portid); > + } > + Whats the purpose of this? Are you trying to flush the device? Wouldn't it be enough just to stop the interface? > + if (count == 0) > + return -1; > + > + printf("%lu packet, %lu drop, %lu idle\n", count, drop, idle); > + printf("Result: %ld cycles per packet\n", (cur_tsc - prev_tsc) / count); > + Bad math here. Theres no guarantee that the tsc hasn't wrapped (potentially more than once) depending on your test length. you need to check the tsc before and after each burst and record an average of deltas instead, accounting in each instance for the possibility of wrap. > + return 0; > +} > + > +static int > +test_pmd_perf(void) > +{ > + uint16_t nb_ports, num, nb_lcores, slave_id = (uint16_t)-1; > + uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; > + uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; > + uint16_t portid; > + uint16_t nb_rx_queue = 1, nb_tx_queue = 1; > + int socketid = -1; > + int ret; > + > + printf("Start PMD RXTX cycles cost test.\n"); > + > + signal(SIGUSR1, signal_handler); Again SIGINT here. > + signal(SIGUSR2, signal_handler); > + > + nb_ports = rte_eth_dev_count(); > + if (nb_ports < NB_ETHPORTS_USED) { > + printf("At least %u port(s) used for perf. test\n", > + NB_ETHPORTS_USED); > + return -1; > + } > + > + if (nb_ports > RTE_MAX_ETHPORTS) > + nb_ports = RTE_MAX_ETHPORTS; > + > + nb_lcores = rte_lcore_count(); > + > + memset(lcore_conf, 0, sizeof(lcore_conf)); > + init_lcores(); > + > + init_mbufpool(NB_MBUF); > + > + reset_count(); > + num = 0; > + for (portid = 0; portid < nb_ports; portid++) { > + if (socketid == -1) { > + socketid = rte_eth_dev_socket_id(portid); > + slave_id = alloc_lcore(socketid); > + if (slave_id == (uint16_t)-1) { > + printf("No avail lcore to run test\n"); > + return -1; > + } > + printf("Performance test runs on lcore %u socket %u\n", > + slave_id, socketid); > + } > + > + if (socketid != rte_eth_dev_socket_id(portid)) { > + printf("Skip port %d\n", portid); > + continue; > + } > + > + /* port configure */ > + ret = rte_eth_dev_configure(portid, nb_rx_queue, > + nb_tx_queue, &port_conf); > + if (ret < 0) > + rte_exit(EXIT_FAILURE, > + "Cannot configure device: err=%d, port=%d\n", > + ret, portid); > + > + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); > + printf("Port %u ", portid); > + print_ethaddr("Address:", &ports_eth_addr[portid]); > + printf("\n"); > + > + /* tx queue setup */ > + ret = rte_eth_tx_queue_setup(portid, 0, nb_txd, > + socketid, &tx_conf); > + if (ret < 0) > + rte_exit(EXIT_FAILURE, > + "rte_eth_tx_queue_setup: err=%d, " > + "port=%d\n", ret, portid); > + > + /* rx queue steup */ > + ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, > + socketid, &rx_conf, > + mbufpool[socketid]); > + if (ret < 0) > + rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d," > + "port=%d\n", ret, portid); > + > + /* Start device */ > + stop = 0; > + ret = rte_eth_dev_start(portid); > + if (ret < 0) > + rte_exit(EXIT_FAILURE, > + "rte_eth_dev_start: err=%d, port=%d\n", > + ret, portid); > + > + /* always eanble promiscuous */ > + rte_eth_promiscuous_enable(portid); > + > + lcore_conf[slave_id].portlist[num++] = portid; > + lcore_conf[slave_id].nb_ports++; > + } > + check_all_ports_link_status(nb_ports, RTE_PORT_ALL); > + > + init_traffic(mbufpool[socketid], tx_burst, MAX_TRAFIC_BURST); > + > + rte_eal_remote_launch(main_loop, NULL, slave_id); > + if (rte_eal_wait_lcore(slave_id) < 0) > + return -1; > + > + /* port tear down */ > + for (portid = 0; portid < nb_ports; portid++) { > + if (socketid != rte_eth_dev_socket_id(portid)) > + continue; > + > + rte_eth_dev_stop(portid); > + } > + Clean up your allocated memory/lcores/etc? Neil