From: <eagostini@nvidia.com>
To: <dev@dpdk.org>
Cc: Elena Agostini <eagostini@nvidia.com>
Subject: [PATCH v2 1/1] app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory
Date: Thu, 18 Nov 2021 18:56:13 +0000 [thread overview]
Message-ID: <20211118185613.3246-2-eagostini@nvidia.com> (raw)
In-Reply-To: <20211118185613.3246-1-eagostini@nvidia.com>
From: Elena Agostini <eagostini@nvidia.com>
This patch introduces ethdev in test-gpudev app to provide:
- an example to show how GPU memory can be used to send and receive packets
- an useful tool to measure network metrics when using GPU memory with
io forwarding
With this feature test-gpudev can:
- RX packets in CPU or GPU memory
- Store packets in the gpudev communication list
- TX receive packets from the communication list
It's a simulation of a multi-core application.
Signed-off-by: Elena Agostini <eagostini@nvidia.com>
---
app/test-gpudev/main.c | 477 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 458 insertions(+), 19 deletions(-)
diff --git a/app/test-gpudev/main.c b/app/test-gpudev/main.c
index 250fba6427..18de023208 100644
--- a/app/test-gpudev/main.c
+++ b/app/test-gpudev/main.c
@@ -10,6 +10,8 @@
#include <stdarg.h>
#include <errno.h>
#include <getopt.h>
+#include <stdbool.h>
+#include <signal.h>
#include <rte_common.h>
#include <rte_malloc.h>
@@ -19,22 +21,90 @@
#include <rte_ethdev.h>
#include <rte_mempool.h>
#include <rte_mbuf.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
#include <rte_gpudev.h>
+#define GPU_PAGE_SHIFT 16
+#define GPU_PAGE_SIZE (1UL << GPU_PAGE_SHIFT)
+#define GPU_PAGE_OFFSET (GPU_PAGE_SIZE-1)
+#define GPU_PAGE_MASK (~GPU_PAGE_OFFSET)
+
+#define MAX_QUEUES 16
+#define NUM_COMM_ITEMS 2048
+#define PKT_GAP 4
+
+// #define DEBUG_PRINT 1
+
enum app_args {
ARG_HELP,
- ARG_MEMPOOL
+ ARG_BURST,
+ ARG_GPU,
+ ARG_MBUFD,
+ ARG_MEMORY,
+ ARG_QUEUES,
+ ARG_TESTAPI,
+};
+
+enum mem_type {
+ MEMORY_CPU,
+ MEMORY_GPU
+};
+
+/* Options configurable from cmd line */
+static uint32_t conf_burst = 64;
+static uint16_t conf_gpu_id = 0;
+static enum mem_type conf_mtype = MEMORY_CPU;
+static uint32_t conf_mbuf_dataroom = 2048;
+static uint32_t conf_queues = 1;
+static bool conf_testapi = false;
+static uint16_t conf_nb_descriptors = 2048;
+
+/* Options statically defined */
+static uint32_t conf_nb_mbuf = 16384;
+static uint16_t conf_port_id = 0;
+
+/* Other variables */
+static volatile bool force_quit;
+static struct rte_mempool *mpool;
+static struct rte_pktmbuf_extmem ext_mem;
+struct rte_gpu_comm_list *comm_list_fwd[MAX_QUEUES];
+struct rte_ether_addr port_eth_addr;
+static struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS,
+ .split_hdr_size = 0,
+ .offloads = 0,
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ .offloads = 0,
+ },
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_key = NULL,
+ .rss_hf = ETH_RSS_IP
+ },
+ },
};
static void
usage(const char *prog_name)
{
- printf("%s [EAL options] --\n",
+ printf("%s [EAL options] --\n"
+ " --help\n"
+ " --burst N: number of packets per rx burst\n"
+ " --gpu N: GPU ID to use\n"
+ " --memory N: external mempool memory type, 0 CPU, 1 GPU\n"
+ " --mbufd N: mbuf dataroom size\n"
+ " --testapi: test gpudev function\n"
+ " --queues N: number of RX queues\n",
prog_name);
}
-static void
+static int
args_parse(int argc, char **argv)
{
char **argvopt;
@@ -42,7 +112,19 @@ args_parse(int argc, char **argv)
int opt_idx;
static struct option lgopts[] = {
- { "help", 0, 0, ARG_HELP},
+ { "help", 0, 0, ARG_HELP},
+ /* Packets per burst. */
+ { "burst", 1, 0, ARG_BURST},
+ /* GPU to use. */
+ { "gpu", 1, 0, ARG_GPU},
+ /* Type of memory for the mempool. */
+ { "memory", 1, 0, ARG_MEMORY},
+ /* Size of mbufs dataroom */
+ { "mbufd", 1, 0, ARG_MBUFD},
+ /* Number of RX queues */
+ { "queues", 1, 0, ARG_QUEUES},
+ /* Test only gpudev functions */
+ { "testapi", 0, 0, ARG_TESTAPI},
/* End of options */
{ 0, 0, 0, 0 }
};
@@ -51,6 +133,24 @@ args_parse(int argc, char **argv)
while ((opt = getopt_long(argc, argvopt, "",
lgopts, &opt_idx)) != EOF) {
switch (opt) {
+ case ARG_BURST:
+ conf_burst = (uint32_t) atoi(optarg);
+ break;
+ case ARG_GPU:
+ conf_gpu_id = (uint16_t) atoi(optarg);
+ break;
+ case ARG_MEMORY:
+ conf_mtype = (atoi(optarg) == 1 ? MEMORY_GPU : MEMORY_CPU);
+ break;
+ case ARG_MBUFD:
+ conf_mbuf_dataroom = (uint32_t) atoi(optarg);
+ break;
+ case ARG_QUEUES:
+ conf_queues = (uint32_t) atoi(optarg);
+ break;
+ case ARG_TESTAPI:
+ conf_testapi = (atoi(optarg) == 1 ? true : false);
+ break;
case ARG_HELP:
usage(argv[0]);
break;
@@ -60,6 +160,19 @@ args_parse(int argc, char **argv)
break;
}
}
+
+ if (conf_queues > MAX_QUEUES) {
+ fprintf(stderr, "Can't support more than %d queues\n", MAX_QUEUES);
+ return -1;
+ }
+
+ if (conf_queues * 2 > rte_lcore_count()) {
+ fprintf(stderr, "Need to use at least %d cores to support %d RX/TX queues (EAL cores %d)\n",
+ conf_queues * 2, conf_queues, rte_lcore_count());
+ return -1;
+ }
+
+ return 0;
}
static int
@@ -342,13 +455,130 @@ create_update_comm_list(uint16_t gpu_id)
return -1;
}
+static void
+signal_handler(int signum)
+{
+ if (signum == SIGINT || signum == SIGTERM) {
+ printf("\n\nSignal %d received, preparing to exit...\n",
+ signum);
+ force_quit = true;
+ }
+}
+
+static int
+rx_core(__rte_unused void *arg)
+{
+ uint32_t queue_id;
+ uint32_t nb_rx = 0;
+ int ret = 0;
+ int comm_list_item = 0;
+ struct rte_mbuf *rx_mbufs[RTE_GPU_COMM_LIST_PKTS_MAX];
+
+ queue_id = (rte_lcore_index(rte_lcore_id()) - 1) / 2;
+
+ if (queue_id > conf_queues) {
+ fprintf(stderr, "Please specify the right list of cores (%d cores) in EAL params to support %d queues.\n",
+ conf_queues*2, conf_queues);
+ RTE_GPU_VOLATILE(force_quit) = true;
+ return -1;
+ }
+
+ printf("RX core started on queue %d.\n", queue_id);
+
+ while (force_quit == false) {
+
+ nb_rx = 0;
+ while (nb_rx < RTE_GPU_COMM_LIST_PKTS_MAX &&
+ nb_rx < (conf_burst - PKT_GAP) &&
+ force_quit == false) {
+ nb_rx += rte_eth_rx_burst(conf_port_id, queue_id,
+ &(rx_mbufs[nb_rx]),
+ (conf_burst - nb_rx));
+ }
+
+ ret = rte_gpu_comm_populate_list_pkts(
+ &(comm_list_fwd[queue_id][comm_list_item]), rx_mbufs, nb_rx);
+ if (ret) {
+ fprintf(stderr, "rte_gpu_comm_populate_list_pkts error %d.\n", ret);
+ return -1;
+ }
+
+#ifdef DEBUG_PRINT
+ printf("RX %d pkts from item %d\n",
+ comm_list_fwd[queue_id][comm_list_item].num_pkts,
+ comm_list_item);
+#endif
+
+ RTE_GPU_VOLATILE(comm_list_fwd[queue_id][comm_list_item].status) = RTE_GPU_COMM_LIST_DONE;
+
+ comm_list_item = (comm_list_item+1) % NUM_COMM_ITEMS;
+ }
+
+ return 0;
+}
+
+static int
+tx_core(__rte_unused void *arg)
+{
+ uint32_t queue_id = 0;
+ uint32_t nb_tx = 0;
+ int ret = 0;
+ int comm_list_item = 0;
+
+ queue_id = (rte_lcore_index(rte_lcore_id()) - 1) / 2;
+ if (queue_id > conf_queues) {
+ fprintf(stderr, "Please specify the right list of cores (%d cores) in EAL params to support %d queues.\n",
+ conf_queues*2, conf_queues);
+ RTE_GPU_VOLATILE(force_quit) = true;
+ return -1;
+ }
+ printf("TX core started on queue %d.\n", queue_id);
+
+ while (force_quit == false) {
+
+#ifdef DEBUG_PRINT
+ printf("Waiting on item %d\n", comm_list_item);
+#endif
+ while (RTE_GPU_VOLATILE(comm_list_fwd[queue_id][comm_list_item].status) !=
+ RTE_GPU_COMM_LIST_DONE && force_quit == false);
+
+ nb_tx = 0;
+ while (nb_tx < comm_list_fwd[queue_id][comm_list_item].num_pkts) {
+ nb_tx += rte_eth_tx_burst(conf_port_id, queue_id,
+ &(comm_list_fwd[queue_id][comm_list_item].mbufs[nb_tx]),
+ comm_list_fwd[queue_id][comm_list_item].num_pkts - nb_tx);
+ }
+ rte_wmb();
+
+#ifdef DEBUG_PRINT
+ printf("TX %d/%d pkts from item %d\n",
+ nb_tx, comm_list_fwd[queue_id][comm_list_item].num_pkts,
+ comm_list_item);
+#endif
+ ret = rte_gpu_comm_cleanup_list(&(comm_list_fwd[queue_id][comm_list_item]));
+ if (ret) {
+ fprintf(stderr, "rte_gpu_comm_cleanup_list error %d.\n", ret);
+ return -1;
+ }
+
+ rte_mb();
+
+ comm_list_item = (comm_list_item+1) % NUM_COMM_ITEMS;
+ }
+
+ return 0;
+}
+
int
main(int argc, char **argv)
{
- int ret;
+ int ret, core_id;
int nb_gpus = 0;
+ int nb_ports = 0;
int16_t gpu_id = 0;
+ uint32_t idx_q = 0;
struct rte_gpu_info ginfo;
+ struct rte_eth_dev_info dev_info;
/* Init EAL. */
ret = rte_eal_init(argc, argv);
@@ -356,8 +586,14 @@ main(int argc, char **argv)
rte_exit(EXIT_FAILURE, "EAL init failed\n");
argc -= ret;
argv += ret;
- if (argc > 1)
- args_parse(argc, argv);
+ if (argc > 1) {
+ ret = args_parse(argc, argv);
+ if (ret) {
+ fprintf(stderr, "Input args error.\n");
+ goto exit;
+ }
+ }
+
argc -= ret;
argv += ret;
@@ -381,25 +617,228 @@ main(int argc, char **argv)
if (nb_gpus == 0) {
fprintf(stderr, "Need at least one GPU on the system to run the example\n");
- return EXIT_FAILURE;
+ goto exit;
}
- gpu_id = 0;
+ if (nb_gpus < conf_gpu_id) {
+ fprintf(stderr, "Not enough GPUs in the system (%d / %d).\n", nb_gpus, conf_gpu_id);
+ goto exit;
+ }
- /**
- * Memory tests
- */
- alloc_gpu_memory(gpu_id);
- register_cpu_memory(gpu_id);
+ if (conf_testapi == true) {
+ /* Memory tests */
+ alloc_gpu_memory(gpu_id);
+ register_cpu_memory(gpu_id);
- /**
- * Communication items test
- */
- create_update_comm_flag(gpu_id);
- create_update_comm_list(gpu_id);
+ /* Communication items test */
+ create_update_comm_flag(gpu_id);
+ create_update_comm_list(gpu_id);
+
+ goto exit;
+ }
+
+ force_quit = false;
+ signal(SIGINT, signal_handler);
+ signal(SIGTERM, signal_handler);
+
+ nb_ports = rte_eth_dev_count_avail();
+ if (nb_ports == 0)
+ rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
+
+ ret = rte_eth_dev_info_get(conf_port_id, &dev_info);
+ if (ret) {
+ fprintf(stderr, "rte_eth_dev_info_get failed with %d.\n", ret);
+ goto exit;
+ }
+
+ /* Create external memory mempool. */
+ ext_mem.elt_size = conf_mbuf_dataroom + RTE_PKTMBUF_HEADROOM;
+ ext_mem.buf_len = RTE_ALIGN_CEIL(conf_nb_mbuf * ext_mem.elt_size, GPU_PAGE_SIZE);
+
+ if (conf_mtype == MEMORY_CPU) {
+ ext_mem.buf_ptr = rte_malloc("extmem", ext_mem.buf_len, 0);
+ if (ext_mem.buf_ptr == NULL) {
+ fprintf(stderr, "Could not allocate CPU DPDK memory.\n");
+ goto exit;
+ }
+
+ ret = rte_gpu_mem_register(conf_gpu_id, ext_mem.buf_len, ext_mem.buf_ptr);
+ if (ret < 0) {
+ fprintf(stderr,
+ "rte_gpu_mem_register CPU memory returned error %d.\n", ret);
+ return -1;
+ }
+ } else {
+ ext_mem.buf_iova = RTE_BAD_IOVA;
+
+ ext_mem.buf_ptr = rte_gpu_mem_alloc(conf_gpu_id, ext_mem.buf_len);
+ if (ext_mem.buf_ptr == NULL) {
+ fprintf(stderr, "Could not allocate GPU device memory.\n");
+ goto exit;
+ }
+
+ ret = rte_extmem_register(ext_mem.buf_ptr, ext_mem.buf_len,
+ NULL, ext_mem.buf_iova, GPU_PAGE_SIZE);
+ if (ret) {
+ fprintf(stderr, "Unable to register addr 0x%p, ret %d.\n", ext_mem.buf_ptr, ret);
+ goto exit;
+ }
+ }
+
+ /* DMA map the external memory. */
+ ret = rte_dev_dma_map(dev_info.device, ext_mem.buf_ptr,
+ ext_mem.buf_iova, ext_mem.buf_len);
+ if (ret) {
+ fprintf(stderr, "Could not DMA map EXT memory.\n");
+ goto exit;
+ }
+
+ /* Create external memory mempool. */
+ mpool = rte_pktmbuf_pool_create_extbuf("payload_mpool", conf_nb_mbuf,
+ 0, 0, ext_mem.elt_size,
+ rte_socket_id(), &ext_mem, 1);
+ if (mpool == NULL) {
+ fprintf(stderr, "Could not create EXT memory mempool.\n");
+ goto exit;
+ }
+
+ /* Queues configuration. */
+ ret = rte_eth_dev_configure(conf_port_id, conf_queues,
+ conf_queues, &port_conf);
+ if (ret < 0) {
+ fprintf(stderr,
+ "Cannot configure device: err=%d, port=%u queues=%u\n",
+ ret, conf_port_id, conf_queues);
+ goto exit;
+ }
+
+ ret = rte_eth_dev_adjust_nb_rx_tx_desc(conf_port_id,
+ &conf_nb_descriptors, &conf_nb_descriptors);
+ if (ret) {
+ fprintf(stderr,
+ "Cannot adjust number of descriptors: err=%d, port=%u\n",
+ ret, conf_port_id);
+ goto exit;
+ }
+
+ for (idx_q = 0; idx_q < conf_queues; idx_q++) {
+
+ ret = rte_eth_rx_queue_setup(conf_port_id, idx_q,
+ conf_nb_descriptors, rte_lcore_to_socket_id(idx_q),
+ NULL, mpool);
+
+ if (ret) {
+ fprintf(stderr, "rte_eth_rx_queue_setup: err=%d, port=%u\n",
+ ret, conf_port_id);
+ goto exit;
+ }
+
+ ret = rte_eth_tx_queue_setup(conf_port_id, idx_q,
+ conf_nb_descriptors, rte_lcore_to_socket_id(idx_q), NULL);
+ if (ret) {
+ fprintf(stderr, "rte_eth_tx_queue_setup: err=%d, port=%u\n",
+ ret, conf_port_id);
+ goto exit;
+ }
+ }
+
+ rte_eth_macaddr_get(conf_port_id, &port_eth_addr);
+
+ ret = rte_eth_dev_start(conf_port_id);
+ if (ret) {
+ fprintf(stderr, "rte_eth_dev_start: err=%d, port=%u\n",
+ ret, conf_port_id);
+ goto exit;
+ }
+
+ printf("Port %d: %02x:%02x:%02x:%02x:%02x:%02x started!\n",
+ conf_port_id,
+ (uint8_t)port_eth_addr.addr_bytes[0],
+ (uint8_t)port_eth_addr.addr_bytes[1],
+ port_eth_addr.addr_bytes[2],
+ port_eth_addr.addr_bytes[3],
+ port_eth_addr.addr_bytes[4],
+ port_eth_addr.addr_bytes[5]);
+
+ rte_eth_promiscuous_enable(conf_port_id);
+
+ /* Create communication lists, one per queue. */
+ for (idx_q = 0; idx_q < MAX_QUEUES; idx_q++) {
+ comm_list_fwd[idx_q] = NULL;
+
+ if (idx_q < conf_queues) {
+ comm_list_fwd[idx_q] = rte_gpu_comm_create_list(conf_gpu_id,
+ NUM_COMM_ITEMS);
+ if (comm_list_fwd[idx_q] == NULL) {
+ fprintf(stderr, "comm_create_list returned error %d\n",
+ ret);
+ goto exit;
+ }
+ ret = rte_gpu_comm_cleanup_list(&(comm_list_fwd[idx_q][0]));
+ if (ret < 0) {
+ fprintf(stderr, "comm_cleanup_list returned error %d\n",
+ ret);
+ goto exit;
+ }
+ }
+ }
+
+ core_id = 0;
+ for (idx_q = 0; idx_q < conf_queues; idx_q++) {
+ core_id = rte_get_next_lcore(core_id, 1, 0);
+ rte_eal_remote_launch(tx_core, NULL, core_id);
+
+ core_id = rte_get_next_lcore(core_id, 1, 0);
+ rte_eal_remote_launch(rx_core, NULL, core_id);
+ }
+
+ core_id = 0;
+ RTE_LCORE_FOREACH_WORKER(core_id) {
+ if (rte_eal_wait_lcore(core_id) < 0) {
+ fprintf(stderr, "bad exit for core %d.\n",
+ core_id);
+ break;
+ }
+ }
+
+ force_quit = true;
+
+ ret = rte_dev_dma_unmap(dev_info.device, (void *)ext_mem.buf_ptr,
+ RTE_BAD_IOVA, ext_mem.buf_len);
+ if (ret) {
+ fprintf(stderr,
+ "rte_dev_dma_unmap 0x%p -> %d (rte_errno = %d)\n",
+ (uint8_t *)ext_mem.buf_ptr, ret, rte_errno);
+ goto exit;
+ }
+
+ if (conf_mtype == MEMORY_CPU) {
+ ret = rte_gpu_mem_unregister(conf_gpu_id, ext_mem.buf_ptr);
+ if (ret < 0) {
+ fprintf(stderr, "rte_gpu_mem_unregister returned error %d\n", ret);
+ goto exit;
+ }
+
+ rte_free(ext_mem.buf_ptr);
+
+ } else {
+
+ ret = rte_extmem_unregister(ext_mem.buf_ptr, ext_mem.buf_len);
+ if (ret) {
+ fprintf(stderr, "rte_extmem_unregister failed with %d.\n", ret);
+ goto exit;
+ }
+
+ rte_gpu_mem_free(conf_gpu_id, (void *)ext_mem.buf_ptr);
+ }
+
+ rte_eth_dev_stop(conf_port_id);
+ rte_eth_dev_close(conf_port_id);
+exit:
/* clean up the EAL */
rte_eal_cleanup();
+ printf("Bye...\n");
return EXIT_SUCCESS;
}
--
2.17.1
next prev parent reply other threads:[~2021-11-18 10:45 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-11-18 1:52 [PATCH v1 0/1] " eagostini
2021-11-18 1:52 ` [PATCH v1 1/1] " eagostini
2021-11-18 6:17 ` Jerin Jacob
2021-11-18 10:16 ` Elena Agostini
2021-11-18 18:56 ` [PATCH v2 0/1] " eagostini
2021-11-18 18:56 ` eagostini [this message]
2023-07-06 18:58 ` [PATCH v2 1/1] " Stephen Hemminger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211118185613.3246-2-eagostini@nvidia.com \
--to=eagostini@nvidia.com \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).