From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by dpdk.org (Postfix) with ESMTP id A468095D5 for ; Fri, 12 Feb 2016 15:57:31 +0100 (CET) Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by orsmga103.jf.intel.com with ESMTP; 12 Feb 2016 06:57:31 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.22,436,1449561600"; d="scan'208";a="913876868" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga002.fm.intel.com with ESMTP; 12 Feb 2016 06:57:29 -0800 Received: from sivswdev02.ir.intel.com (sivswdev02.ir.intel.com [10.237.217.46]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id u1CEvTbY010344; Fri, 12 Feb 2016 14:57:29 GMT Received: from sivswdev02.ir.intel.com (localhost [127.0.0.1]) by sivswdev02.ir.intel.com with ESMTP id u1CEvSVS025981; Fri, 12 Feb 2016 14:57:28 GMT Received: (from reshmapa@localhost) by sivswdev02.ir.intel.com with id u1CEvSKn025977; Fri, 12 Feb 2016 14:57:28 GMT From: Reshma Pattan To: dev@dpdk.org Date: Fri, 12 Feb 2016 14:57:23 +0000 Message-Id: <1455289045-25915-4-git-send-email-reshma.pattan@intel.com> X-Mailer: git-send-email 1.7.4.1 In-Reply-To: <1455289045-25915-1-git-send-email-reshma.pattan@intel.com> References: <1455289045-25915-1-git-send-email-reshma.pattan@intel.com> Subject: [dpdk-dev] [PATCH v2 3/5] app/proc_info: add tcpdump support in secondary process X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 12 Feb 2016 14:57:32 -0000 Added "--tcupdump and "--src-ip-filter" command line options for tcpdump support. Added pcap device creation and writing of packets to pcap device for tcpdump. Added socket functionality to communicate with primary process. Signed-off-by: Reshma Pattan --- app/proc_info/main.c | 451 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 447 insertions(+), 4 deletions(-) diff --git a/app/proc_info/main.c b/app/proc_info/main.c index 341176d..fe4d9a9 100644 --- a/app/proc_info/main.c +++ b/app/proc_info/main.c @@ -1,7 +1,7 @@ /* * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,8 +38,25 @@ #include #include #include +#include #include #include +#include +#include +#include +#include + +/* sys/un.h with __USE_MISC uses strlen, which is unsafe */ +#ifdef __USE_MISC +#define REMOVED_USE_MISC +#undef __USE_MISC +#endif +#include +/* make sure we redefine __USE_MISC only if it was previously undefined */ +#ifdef REMOVED_USE_MISC +#define __USE_MISC +#undef REMOVED_USE_MISC +#endif #include #include @@ -57,11 +74,25 @@ #include #include #include +#include + +#ifdef RTE_LIBRTE_PMD_PCAP +#include +#endif /* Maximum long option length for option parsing. */ #define MAX_LONG_OPT_SZ 64 #define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1 - +#define APP_ARG_TCPDUMP_MAX_TUPLES 54 +#define TCPDUMP_SOCKET_PATH "%s/tcpdump_mp_socket" +#define CMSGLEN CMSG_LEN(sizeof(int)) +#define TX_DESC_PER_QUEUE 512 +#define RX_DESC_PER_QUEUE 128 +#define BURST_SIZE 32 +#define MBUF_PER_POOL 65535 +#define MBUF_POOL_CACHE_SIZE 250 + +static struct rte_eth_conf port_conf_default; /**< mask of enabled ports */ static uint32_t enabled_port_mask; /**< Enable stats. */ @@ -75,13 +106,59 @@ static uint32_t reset_xstats; /**< Enable memory info. */ static uint32_t mem_info; +enum tcpdump_msg_type { + REMOVE_RXTX_CBS = 1, + REGISTER_RXTX_CBS = 2 +}; + +enum rx_tx_type { + RX = 1, + TX = 2, + RX_TX_TYPES = 2 +}; + +/**< src ip filter for tcpdump. */ +static uint32_t src_ip_filter; +/**< socket for connecting to primary. */ +static int socket_fd; +/**< vdev port ids. */ +static int pcap_vdev_port_id[RX_TX_TYPES]; +volatile uint8_t quit_signal; +/**< Enable tcpdump feature. */ +bool is_tcpdump_enabled; + +static volatile struct tcpdump_app_stats { + struct { + uint64_t dequeue_pkts; + uint64_t tx_pkts; + uint64_t freed_pkts; + } in __rte_cache_aligned; + struct { + uint64_t dequeue_pkts; + uint64_t tx_pkts; + uint64_t freed_pkts; + } out __rte_cache_aligned; +} tcpdump_app_stats __rte_cache_aligned; + +struct tcpdump_port_queue_tuples { + int num_pq_tuples; + uint8_t port_id[APP_ARG_TCPDUMP_MAX_TUPLES]; + uint8_t queue_id[APP_ARG_TCPDUMP_MAX_TUPLES]; +} __rte_cache_aligned; + +static struct tcpdump_port_queue_tuples tcpdump_pq_t; + /**< display usage */ + static void proc_info_usage(const char *prgname) { printf("%s [EAL options] -- -p PORTMASK\n" " -m to display DPDK memory zones, segments and TAILQ information\n" " -p PORTMASK: hexadecimal bitmask of ports to retrieve stats for\n" + " --tcpdump (port,queue): port and queue info for capturing packets " + "for tcpdump\n" + " --src-ip-filter \"A.B.C.D\": src ip for tcpdump filtering\n" " --stats: to display port statistics, enabled by default\n" " --xstats: to display extended port statistics, disabled by " "default\n" @@ -116,14 +193,79 @@ parse_portmask(const char *portmask) } +static int +parse_tcpdump(const char *q_arg) +{ + char s[256]; + const char *p, *p0 = q_arg; + char *end; + + enum fieldnames { + FLD_PORT = 0, + FLD_QUEUE, + _NUM_FLD + }; + + unsigned long int_fld[_NUM_FLD]; + char *str_fld[_NUM_FLD]; + int i; + unsigned size; + uint32_t nb_tcpdump_params; + + nb_tcpdump_params = 0; + + while ((p = strchr(p0, '(')) != NULL) { + ++p; + p0 = strchr(p, ')'); + if (p0 == NULL) + return -1; + + size = p0 - p; + if (size >= sizeof(s)) + return -1; + + snprintf(s, sizeof(s), "%.*s", size, p); + if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) + return -1; + for (i = 0; i < _NUM_FLD; i++) { + errno = 0; + int_fld[i] = strtoul(str_fld[i], &end, 0); + if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) + return -1; + } + if (nb_tcpdump_params >= APP_ARG_TCPDUMP_MAX_TUPLES) { + printf("exceeded max number of port params: %"PRIu32"\n", + nb_tcpdump_params); + return -1; + } + tcpdump_pq_t.port_id[tcpdump_pq_t.num_pq_tuples] = + (uint8_t)int_fld[FLD_PORT]; + tcpdump_pq_t.queue_id[tcpdump_pq_t.num_pq_tuples] = + (uint8_t)int_fld[FLD_QUEUE]; + tcpdump_pq_t.num_pq_tuples++; + } + return 0; +} + +static int +parse_ip(const char *q_arg) +{ + if (!inet_pton(AF_INET, q_arg, &src_ip_filter)) + return 1; + + return 0; +} + /* Parse the argument given in the command line of the application */ static int proc_info_parse_args(int argc, char **argv) { - int opt; + int opt, ret; int option_index; char *prgname = argv[0]; static struct option long_option[] = { + {"tcpdump", 1, 0, 0}, + {"src-ip-filter", 1, 0, 0}, {"stats", 0, NULL, 0}, {"stats-reset", 0, NULL, 0}, {"xstats", 0, NULL, 0}, @@ -151,6 +293,27 @@ proc_info_parse_args(int argc, char **argv) mem_info = 1; break; case 0: + if (!strncmp(long_option[option_index].name, "tcpdump", + MAX_LONG_OPT_SZ)) { + ret = parse_tcpdump(optarg); + if (ret) { + printf("invalid tcpdump\n"); + proc_info_usage(prgname); + return -1; + } + is_tcpdump_enabled = true; + } + + if (!strncmp(long_option[option_index].name, "src-ip-filter", + MAX_LONG_OPT_SZ)) { + ret = parse_ip(optarg); + if (ret) { + printf("invalid src-ip-filter\n"); + proc_info_usage(prgname); + return -1; + } + } + /* Print stats */ if (!strncmp(long_option[option_index].name, "stats", MAX_LONG_OPT_SZ)) @@ -285,6 +448,202 @@ nic_xstats_clear(uint8_t port_id) printf("\n NIC extended statistics for port %d cleared\n", port_id); } +/* get socket path (/var/run if root, $HOME otherwise) */ +static void +tcpdump_get_socket_path(char *buffer, int bufsz) +{ + const char *dir = "/var/run/tcpdump_socket"; + const char *home_dir = getenv("HOME/tcpdump_socket"); + + if (getuid() != 0 && home_dir != NULL) + dir = home_dir; + /* use current prefix as file path */ + snprintf(buffer, bufsz, TCPDUMP_SOCKET_PATH, dir); +} + +static int +tcpdump_connect_to_primary(void) +{ + struct sockaddr_un addr; + socklen_t sockaddr_len; + + /* set up a socket */ + socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, "Failed to create socket!\n"); + return -1; + } + + tcpdump_get_socket_path(addr.sun_path, sizeof(addr.sun_path)); + addr.sun_family = AF_UNIX; + + sockaddr_len = sizeof(struct sockaddr_un); + + if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0) + return socket_fd; + + /* if connect failed */ + close(socket_fd); + return -1; +} + +/* send a request, return -1 on error */ +static int +tcpdump_send_request(int socket, enum tcpdump_msg_type type) +{ + char buffer[256]; + struct msghdr reg_cb_msg; + struct iovec msg[3]; + int ret, wc, buf, i, n = 0; + + buf = type; + for (i = 0; i < tcpdump_pq_t.num_pq_tuples; i++) { + wc = snprintf(buffer + n, sizeof(buffer) - n, "(%d,%d)", + tcpdump_pq_t.port_id[i], tcpdump_pq_t.queue_id[i]); + n += wc; + } + + memset(msg, 0, sizeof(msg)); + msg[0].iov_base = (char *) &buf; + msg[0].iov_len = 1; + msg[1].iov_base = (char *)buffer; + msg[1].iov_len = sizeof(buffer); + msg[2].iov_base = (char *) &src_ip_filter; + msg[2].iov_len = sizeof(src_ip_filter); + + memset(®_cb_msg, 0, sizeof(reg_cb_msg)); + reg_cb_msg.msg_iov = msg; + reg_cb_msg.msg_iovlen = 3; + + ret = sendmsg(socket, ®_cb_msg, 0); + if (ret < 0) + return -1; + return 0; +} + +static void +int_handler(int sig_num) +{ + /* connect to primary process using AF_UNIX socket */ + socket_fd = tcpdump_connect_to_primary(); + if (socket_fd < 0) + printf("cannot connect to primary process for RX/TX CBs removal!\n"); + + /* send request to remove rx/tx callbacks */ + if (tcpdump_send_request(socket_fd, REMOVE_RXTX_CBS) < 0) { + printf("cannot send tcpdump remove rxtx cbs eequest!\n"); + close(socket_fd); + } + + /* close tcpdump socket fd */ + close(socket_fd); + printf("Exiting on signal %d\n", sig_num); + quit_signal = 1; +} + +static inline int +configure_pcap_vdev(uint8_t port_id) +{ + struct ether_addr addr; + const uint16_t rxRings = 0, txRings = 1; + const uint8_t nb_ports = rte_eth_dev_count(); + int ret; + uint16_t q; + + if (port_id > nb_ports) + return -1; + + ret = rte_eth_dev_configure(port_id, rxRings, txRings, &port_conf_default); + if (ret != 0) + return ret; + + for (q = 0; q < txRings; q++) { + ret = rte_eth_tx_queue_setup(port_id, q, TX_DESC_PER_QUEUE, + rte_eth_dev_socket_id(port_id), NULL); + if (ret < 0) { + rte_exit(EXIT_FAILURE, "queue setup failed\n"); + return ret; + } + } + + ret = rte_eth_dev_start(port_id); + if (ret < 0) + return ret; + + rte_eth_macaddr_get(port_id, &addr); + printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 + " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", + (unsigned)port_id, + addr.addr_bytes[0], addr.addr_bytes[1], + addr.addr_bytes[2], addr.addr_bytes[3], + addr.addr_bytes[4], addr.addr_bytes[5]); + + rte_eth_promiscuous_enable(port_id); + + return 0; +} + +static int +create_pcap_pmd_vdev(enum rx_tx_type type) { + char pcap_vdev_name[32]; + char pcap_filename[32]; +#ifdef RTE_LIBRTE_PMD_PCAP + struct rx_pcaps rxpcap; + struct tx_pcaps txpcap; +#endif + int port_id; + + if (type == RX) { + snprintf(pcap_vdev_name, sizeof(pcap_vdev_name), + "eth_pcap_tcpdump_%s", "RX"); + snprintf(pcap_filename, sizeof(pcap_filename), + "/tmp/%s_pcap.pcap", "RX"); + } else if (type == TX) { + snprintf(pcap_vdev_name, sizeof(pcap_vdev_name), + "eth_pcap_tcpdump_%s", "TX"); + snprintf(pcap_filename, sizeof(pcap_filename), + "/tmp/%s_pcap.pcap", "TX"); + } + +#ifdef RTE_LIBRTE_PMD_PCAP + rxpcap.names[0] = ""; + rxpcap.types[0] = ""; + rxpcap.num_of_rx = 0; + txpcap.names[0] = pcap_filename; + txpcap.types[0] = "tx_pcap"; + txpcap.num_of_tx = 1; + + port_id = rte_eth_from_pcapsndumpers(pcap_vdev_name, + &rxpcap, rxpcap.num_of_rx, + &txpcap, txpcap.num_of_tx, rte_socket_id()); +#else + port_id = -1; +#endif + if (port_id < 0) + rte_exit(EXIT_FAILURE, "Failed to create pcap_vdev\n"); + + return port_id; +} + +static void +print_tcpdump_stats(void) +{ + printf("##### TCPDUMP DEBUG STATS #####\n"); + printf(" - Input packets dequeued: %"PRIu64"\n", + tcpdump_app_stats.in.dequeue_pkts); + printf(" - Input packets transmitted to pcap: %"PRIu64"\n", + tcpdump_app_stats.in.tx_pkts); + printf(" - Input packets freed: %"PRIu64"\n", + tcpdump_app_stats.in.freed_pkts); + printf(" - Output packets dequeued: %"PRIu64"\n", + tcpdump_app_stats.out.dequeue_pkts); + printf(" - Output packets transmitted to pcap: %"PRIu64"\n", + tcpdump_app_stats.out.tx_pkts); + printf(" - Output packets freed: %"PRIu64"\n", + tcpdump_app_stats.out.freed_pkts); + printf("################################\n"); +} + int main(int argc, char **argv) { @@ -295,6 +654,10 @@ main(int argc, char **argv) char mp_flag[] = "--proc-type=secondary"; char *argp[argc + 3]; uint8_t nb_ports; + struct rte_ring *rx_ring, *tx_ring; + + /* catch ctrl-c so we can print on exit */ + signal(SIGINT, int_handler); argp[0] = argv[0]; argp[1] = c_flag; @@ -327,7 +690,6 @@ main(int argc, char **argv) if (nb_ports == 0) rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n"); - if (nb_ports > RTE_MAX_ETHPORTS) nb_ports = RTE_MAX_ETHPORTS; @@ -348,5 +710,86 @@ main(int argc, char **argv) } } + if (is_tcpdump_enabled == true) { + + /* create pcap virtual devices for rx and tx */ + pcap_vdev_port_id[0] = create_pcap_pmd_vdev(RX); + configure_pcap_vdev(pcap_vdev_port_id[0]); + + pcap_vdev_port_id[1] = create_pcap_pmd_vdev(TX); + configure_pcap_vdev(pcap_vdev_port_id[1]); + + /* connect to primary process using AF_UNIX socket */ + socket_fd = tcpdump_connect_to_primary(); + if (socket_fd < 0) { + printf("cannot connect to primary process!\n"); + return -1; + } + + if (tcpdump_send_request(socket_fd, REGISTER_RXTX_CBS) < 0) { + printf("cannot send tcpdump register rxtx cbs request!\n"); + close(socket_fd); + return -1; + } + + while (1) { + rx_ring = rte_ring_lookup("prim_to_sec_rx"); + tx_ring = rte_ring_lookup("prim_to_sec_tx"); + if (rx_ring != NULL && tx_ring != NULL) + break; + } + + while (!quit_signal) { + /* write input packets of port to pcap file for tcpdump */ + struct rte_mbuf *rx_bufs[BURST_SIZE]; + + /* first dequeue packets from ring of primary process */ + const uint16_t nb_in_deq = rte_ring_dequeue_burst(rx_ring, + (void *)rx_bufs, BURST_SIZE); + tcpdump_app_stats.in.dequeue_pkts += nb_in_deq; + + if (nb_in_deq) { + /* then sent on pcap file */ + uint16_t nb_in_txd = rte_eth_tx_burst( + pcap_vdev_port_id[0], + 0, rx_bufs, nb_in_deq); + tcpdump_app_stats.in.tx_pkts += nb_in_txd; + + if (unlikely(nb_in_txd < nb_in_deq)) { + do { + rte_pktmbuf_free(rx_bufs[nb_in_txd]); + tcpdump_app_stats.in.freed_pkts++; + } while (++nb_in_txd < nb_in_deq); + } + + } + + /* write output packets of port to pcap file for tcpdump */ + struct rte_mbuf *tx_bufs[BURST_SIZE]; + + /* first dequeue from ring of primary process */ + const uint16_t nb_out_deq = rte_ring_dequeue_burst(tx_ring, + (void *)tx_bufs, BURST_SIZE); + tcpdump_app_stats.out.dequeue_pkts += nb_out_deq; + + if (nb_out_deq) { + /* then sent on pcap file */ + uint16_t nb_out_txd = rte_eth_tx_burst( + pcap_vdev_port_id[1], + 0, tx_bufs, nb_out_deq); + tcpdump_app_stats.out.tx_pkts += nb_out_txd; + if (unlikely(nb_out_txd < nb_out_deq)) { + do { + rte_pktmbuf_free(tx_bufs[nb_out_txd]); + tcpdump_app_stats.out.freed_pkts++; + } while (++nb_out_txd < nb_out_deq); + + } + } + } + + print_tcpdump_stats(); + + } return 0; } -- 1.7.4.1