[PATCH v1 0/1] app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory

DPDK patches and discussions
 help / color / mirror / Atom feed

* [PATCH v1 0/1] app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory
@ 2021-11-18  1:52 eagostini
  2021-11-18  1:52 ` [PATCH v1 1/1] " eagostini
  2021-11-18 18:56 ` [PATCH v2 0/1] " eagostini
  0 siblings, 2 replies; 7+ messages in thread
From: eagostini @ 2021-11-18  1:52 UTC (permalink / raw)
  To: dev; +Cc: Elena Agostini

From: Elena Agostini <eagostini@nvidia.com>

This patch introduces ethdev in test-gpudev app to provide:
- an example to show how GPU memory can be used to send and receive packets
- an useful tool to measure network metrics when using GPU memory with
io forwarding

With this feature test-gpudev can:
- RX packets in CPU or GPU memory
- Store packets in the gpudev communication list
- TX receive packets from the communication list

It's a simulation of a multi-core application.

Elena Agostini (1):
  app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory

 app/test-gpudev/main.c | 471 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 452 insertions(+), 19 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v1 1/1] app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory
  2021-11-18  1:52 [PATCH v1 0/1] app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory eagostini
@ 2021-11-18  1:52 ` eagostini
  2021-11-18  6:17   ` Jerin Jacob
  2021-11-18 18:56 ` [PATCH v2 0/1] " eagostini
  1 sibling, 1 reply; 7+ messages in thread
From: eagostini @ 2021-11-18  1:52 UTC (permalink / raw)
  To: dev; +Cc: Elena Agostini

From: Elena Agostini <eagostini@nvidia.com>

This patch introduces ethdev in test-gpudev app to provide:
- an example to show how GPU memory can be used to send and receive packets
- an useful tool to measure network metrics when using GPU memory with
io forwarding

With this feature test-gpudev can:
- RX packets in CPU or GPU memory
- Store packets in the gpudev communication list
- TX receive packets from the communication list

It's a simulation of a multi-core application.

Signed-off-by: Elena Agostini <eagostini@nvidia.com>
---
 app/test-gpudev/main.c | 471 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 452 insertions(+), 19 deletions(-)

diff --git a/app/test-gpudev/main.c b/app/test-gpudev/main.c
index 250fba6427..daa586c64e 100644
--- a/app/test-gpudev/main.c
+++ b/app/test-gpudev/main.c
@@ -10,6 +10,8 @@
 #include <stdarg.h>
 #include <errno.h>
 #include <getopt.h>
+#include <stdbool.h>
+#include <signal.h>
 
 #include <rte_common.h>
 #include <rte_malloc.h>
@@ -19,22 +21,98 @@
 #include <rte_ethdev.h>
 #include <rte_mempool.h>
 #include <rte_mbuf.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
 
 #include <rte_gpudev.h>
 
+#ifndef ACCESS_ONCE
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&x)
+#endif
+
+#ifndef WRITE_ONCE
+#define WRITE_ONCE(x, v) (ACCESS_ONCE(x) = (v))
+#endif
+
+#define GPU_PAGE_SHIFT   16
+#define GPU_PAGE_SIZE    (1UL << GPU_PAGE_SHIFT)
+#define GPU_PAGE_OFFSET  (GPU_PAGE_SIZE-1)
+#define GPU_PAGE_MASK    (~GPU_PAGE_OFFSET)
+
+#define MAX_QUEUES 16
+#define NUM_COMM_ITEMS 2048
+#define PKT_GAP 4
+
+// #define DEBUG_PRINT 1
+
 enum app_args {
 	ARG_HELP,
-	ARG_MEMPOOL
+	ARG_BURST,
+	ARG_GPU,
+	ARG_MBUFD,
+	ARG_MEMORY,
+	ARG_QUEUES,
+	ARG_TESTAPI,
+};
+
+enum mem_type {
+	MEMORY_CPU,
+	MEMORY_GPU
+};
+
+/* Options configurable from cmd line */
+static uint32_t conf_burst = 64;
+static uint16_t conf_gpu_id = 0;
+static enum mem_type conf_mtype = MEMORY_CPU;
+static uint32_t conf_mbuf_dataroom = 2048;
+static uint32_t conf_queues = 1;
+static bool conf_testapi = false;
+static uint16_t conf_nb_descriptors = 2048;
+
+/* Options statically defined */
+static uint32_t conf_nb_mbuf = 16384;
+static uint16_t conf_port_id = 0;
+
+/* Other variables */
+static volatile bool force_quit;
+static struct rte_mempool *mpool;
+static struct rte_pktmbuf_extmem ext_mem;
+struct rte_gpu_comm_list *comm_list_fwd[MAX_QUEUES];
+struct rte_ether_addr port_eth_addr;
+static struct rte_eth_conf port_conf = {
+	.rxmode = {
+				.mq_mode = ETH_MQ_RX_RSS,
+				.split_hdr_size = 0,
+				.offloads = 0,
+			},
+	.txmode = {
+			.mq_mode = ETH_MQ_TX_NONE,
+			.offloads = 0,
+			},
+	.rx_adv_conf = {
+			.rss_conf = {
+						.rss_key = NULL,
+						.rss_hf = ETH_RSS_IP
+					},
+			},
 };
 
 static void
 usage(const char *prog_name)
 {
-	printf("%s [EAL options] --\n",
+	printf("%s [EAL options] --\n"
+		" --help\n"
+		" --burst N: number of packets per rx burst\n"
+		" --gpu N: GPU ID to use\n"
+		" --memory N: external mempool memory type, 0 CPU, 1 GPU\n"
+		" --mbufd N: mbuf dataroom size\n"
+		" --testapi: test gpudev function\n"
+		" --queues N: number of RX queues\n",
 		prog_name);
 }
 
-static void
+static int
 args_parse(int argc, char **argv)
 {
 	char **argvopt;
@@ -42,7 +120,19 @@ args_parse(int argc, char **argv)
 	int opt_idx;
 
 	static struct option lgopts[] = {
-		{ "help", 0, 0, ARG_HELP},
+		{ "help",  0, 0, ARG_HELP},
+		/* Packets per burst. */
+		{ "burst",  1, 0, ARG_BURST},
+		/* GPU to use. */
+		{ "gpu",  1, 0, ARG_GPU},
+		/* Type of memory for the mempool. */
+		{ "memory",  1, 0, ARG_MEMORY},
+		/* Size of mbufs dataroom */
+		{ "mbufd", 1, 0, ARG_MBUFD},
+		/* Number of RX queues */
+		{ "queues", 1, 0, ARG_QUEUES},
+		/* Test only gpudev functions */
+		{ "testapi", 0, 0, ARG_TESTAPI},
 		/* End of options */
 		{ 0, 0, 0, 0 }
 	};
@@ -51,6 +141,24 @@ args_parse(int argc, char **argv)
 	while ((opt = getopt_long(argc, argvopt, "",
 				lgopts, &opt_idx)) != EOF) {
 		switch (opt) {
+		case ARG_BURST:
+			conf_burst = (uint32_t) atoi(optarg);
+			break;
+		case ARG_GPU:
+			conf_gpu_id = (uint16_t) atoi(optarg);
+			break;
+		case ARG_MEMORY:
+			conf_mtype = (atoi(optarg) == 1 ? MEMORY_GPU : MEMORY_CPU);
+			break;
+		case ARG_MBUFD:
+			conf_mbuf_dataroom = (uint32_t) atoi(optarg);
+			break;
+		case ARG_QUEUES:
+			conf_queues = (uint32_t) atoi(optarg);
+			break;
+		case ARG_TESTAPI:
+			conf_testapi = (atoi(optarg) == 1 ? true : false);
+			break;
 		case ARG_HELP:
 			usage(argv[0]);
 			break;
@@ -60,6 +168,20 @@ args_parse(int argc, char **argv)
 			break;
 		}
 	}
+
+	if (conf_queues > MAX_QUEUES) {
+		fprintf(stderr, "Can't support more than %d queues\n", MAX_QUEUES);
+		return -1;
+	}
+
+	if (conf_queues * 2 > rte_lcore_count()) {
+		fprintf(stderr,
+				"Need to use at least %d cores to support %d RX/TX queues (EAL cores %d)\n",
+				conf_queues * 2, conf_queues, rte_lcore_count());
+		return -1;
+	}
+
+	return 0;
 }
 
 static int
@@ -342,13 +464,118 @@ create_update_comm_list(uint16_t gpu_id)
 	return -1;
 }
 
+static void
+signal_handler(int signum)
+{
+	if (signum == SIGINT || signum == SIGTERM) {
+		printf("\n\nSignal %d received, preparing to exit...\n",
+				signum);
+		force_quit = true;
+	}
+}
+
+static int
+rx_core(__rte_unused void *arg)
+{
+	uint32_t queue_id;
+	uint32_t nb_rx = 0;
+	int ret = 0;
+	int comm_list_item = 0;
+	struct rte_mbuf *rx_mbufs[RTE_GPU_COMM_LIST_PKTS_MAX];
+
+	queue_id = (rte_lcore_index(rte_lcore_id()) - 1) / 2;
+
+	printf("RX core started on queue %d.\n", queue_id);
+
+	while (force_quit == false) {
+
+		nb_rx = 0;
+		while (nb_rx < RTE_GPU_COMM_LIST_PKTS_MAX &&
+				nb_rx < (conf_burst - PKT_GAP) &&
+				force_quit == false) {
+			nb_rx += rte_eth_rx_burst(conf_port_id, queue_id,
+										&(rx_mbufs[nb_rx]),
+										(conf_burst - nb_rx));
+		}
+
+		ret = rte_gpu_comm_populate_list_pkts(
+				&(comm_list_fwd[queue_id][comm_list_item]), rx_mbufs, nb_rx);
+		if (ret) {
+			fprintf(stderr,
+					"rte_gpu_comm_populate_list_pkts error %d.\n", ret);
+			return -1;
+		}
+
+#ifdef DEBUG_PRINT
+		printf("RX %d pkts from item %d\n",
+			comm_list_fwd[queue_id][comm_list_item].num_pkts,
+			comm_list_item);
+#endif
+
+		WRITE_ONCE(comm_list_fwd[queue_id][comm_list_item].status, RTE_GPU_COMM_LIST_DONE);
+
+		comm_list_item = (comm_list_item+1) % NUM_COMM_ITEMS;
+	}
+
+	return 0;
+}
+
+static int
+tx_core(__rte_unused void *arg)
+{
+	uint32_t queue_id = 0;
+	uint32_t nb_tx = 0;
+	int ret = 0;
+	int comm_list_item = 0;
+
+	queue_id = (rte_lcore_index(rte_lcore_id()) - 1) / 2;
+	printf("TX core started on queue %d.\n", queue_id);
+
+	while (force_quit == false) {
+
+#ifdef DEBUG_PRINT
+		printf("Waiting on item %d\n", comm_list_item);
+#endif
+		while (ACCESS_ONCE(comm_list_fwd[queue_id][comm_list_item].status)
+				!= RTE_GPU_COMM_LIST_DONE && force_quit == false);
+
+		nb_tx = 0;
+		while (nb_tx < comm_list_fwd[queue_id][comm_list_item].num_pkts) {
+			nb_tx += rte_eth_tx_burst(conf_port_id, queue_id,
+					&(comm_list_fwd[queue_id][comm_list_item].mbufs[nb_tx]),
+					comm_list_fwd[queue_id][comm_list_item].num_pkts - nb_tx);
+		}
+		rte_wmb();
+
+#ifdef DEBUG_PRINT
+		printf("TX %d/%d pkts from item %d\n",
+				nb_tx, comm_list_fwd[queue_id][comm_list_item].num_pkts,
+				comm_list_item);
+#endif
+		ret = rte_gpu_comm_cleanup_list(&(comm_list_fwd[queue_id][comm_list_item]));
+		if (ret) {
+			fprintf(stderr, "rte_gpu_comm_cleanup_list error %d.\n", ret);
+			return -1;
+		}
+
+		rte_mb();
+
+		comm_list_item = (comm_list_item+1) % NUM_COMM_ITEMS;
+	}
+
+	return 0;
+}
+
 int
 main(int argc, char **argv)
 {
-	int ret;
+	int ret, core_id;
 	int nb_gpus = 0;
+	int nb_ports = 0;
 	int16_t gpu_id = 0;
+	uint32_t idx_q = 0;
 	struct rte_gpu_info ginfo;
+	struct rte_eth_dev_info dev_info;
 
 	/* Init EAL. */
 	ret = rte_eal_init(argc, argv);
@@ -356,8 +583,14 @@ main(int argc, char **argv)
 		rte_exit(EXIT_FAILURE, "EAL init failed\n");
 	argc -= ret;
 	argv += ret;
-	if (argc > 1)
-		args_parse(argc, argv);
+	if (argc > 1) {
+		ret = args_parse(argc, argv);
+		if (ret) {
+			fprintf(stderr, "Input args error.\n");
+			goto exit;
+		}
+	}
+
 	argc -= ret;
 	argv += ret;
 
@@ -381,25 +614,225 @@ main(int argc, char **argv)
 
 	if (nb_gpus == 0) {
 		fprintf(stderr, "Need at least one GPU on the system to run the example\n");
-		return EXIT_FAILURE;
+		goto exit;
 	}
 
-	gpu_id = 0;
+	if (nb_gpus < conf_gpu_id) {
+		fprintf(stderr, "Not enough GPUs in the system (%d / %d).\n", nb_gpus, conf_gpu_id);
+		goto exit;
+	}
 
-	/**
-	 * Memory tests
-	 */
-	alloc_gpu_memory(gpu_id);
-	register_cpu_memory(gpu_id);
+	if (conf_testapi == true) {
+		/* Memory tests */
+		alloc_gpu_memory(gpu_id);
+		register_cpu_memory(gpu_id);
 
-	/**
-	 * Communication items test
-	 */
-	create_update_comm_flag(gpu_id);
-	create_update_comm_list(gpu_id);
+		/* Communication items test */
+		create_update_comm_flag(gpu_id);
+		create_update_comm_list(gpu_id);
+
+		goto exit;
+	}
+
+	force_quit = false;
+	signal(SIGINT, signal_handler);
+	signal(SIGTERM, signal_handler);
+
+	nb_ports = rte_eth_dev_count_avail();
+	if (nb_ports == 0)
+		rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
+
+	ret = rte_eth_dev_info_get(conf_port_id, &dev_info);
+	if (ret) {
+		fprintf(stderr, "rte_eth_dev_info_get failed with %d.\n", ret);
+		goto exit;
+	}
+
+	/* Create external memory mempool. */
+	ext_mem.elt_size = conf_mbuf_dataroom + RTE_PKTMBUF_HEADROOM;
+	ext_mem.buf_len = RTE_ALIGN_CEIL(conf_nb_mbuf * ext_mem.elt_size, GPU_PAGE_SIZE);
+
+	if (conf_mtype == MEMORY_CPU) {
+		ext_mem.buf_ptr = rte_malloc("extmem", ext_mem.buf_len, 0);
+		if (ext_mem.buf_ptr == NULL) {
+			fprintf(stderr, "Could not allocate CPU DPDK memory.\n");
+			goto exit;
+		}
+
+		ret = rte_gpu_mem_register(conf_gpu_id, ext_mem.buf_len, ext_mem.buf_ptr);
+		if (ret < 0) {
+			fprintf(stderr,
+					"rte_gpu_mem_register CPU memory returned error %d.\n", ret);
+			return -1;
+		}
+	} else {
+		ext_mem.buf_iova = RTE_BAD_IOVA;
+
+		ext_mem.buf_ptr = rte_gpu_mem_alloc(conf_gpu_id, ext_mem.buf_len);
+		if (ext_mem.buf_ptr == NULL) {
+			fprintf(stderr, "Could not allocate GPU device memory.\n");
+			goto exit;
+		}
+
+		ret = rte_extmem_register(ext_mem.buf_ptr, ext_mem.buf_len,
+				NULL, ext_mem.buf_iova, GPU_PAGE_SIZE);
+		if (ret) {
+			fprintf(stderr, "Unable to register addr 0x%p, ret %d.\n", ext_mem.buf_ptr, ret);
+			goto exit;
+		}
+	}
+
+	/* DMA map the external memory. */
+	ret = rte_dev_dma_map(dev_info.device, ext_mem.buf_ptr,
+			ext_mem.buf_iova, ext_mem.buf_len);
+	if (ret) {
+		fprintf(stderr, "Could not DMA map EXT memory.\n");
+		goto exit;
+	}
+
+	/* Create external memory mempool. */
+	mpool = rte_pktmbuf_pool_create_extbuf("payload_mpool", conf_nb_mbuf,
+			0, 0, ext_mem.elt_size,
+			rte_socket_id(), &ext_mem, 1);
+	if (mpool == NULL) {
+		fprintf(stderr, "Could not create EXT memory mempool.\n");
+		goto exit;
+	}
+
+	/* Queues configuration. */
+	ret = rte_eth_dev_configure(conf_port_id, conf_queues,
+			conf_queues, &port_conf);
+	if (ret < 0) {
+		fprintf(stderr,
+				"Cannot configure device: err=%d, port=%u queues=%u\n",
+				ret, conf_port_id, conf_queues);
+		goto exit;
+	}
+
+	ret = rte_eth_dev_adjust_nb_rx_tx_desc(conf_port_id,
+			&conf_nb_descriptors, &conf_nb_descriptors);
+	if (ret) {
+		fprintf(stderr,
+				"Cannot adjust number of descriptors: err=%d, port=%u\n",
+				ret, conf_port_id);
+		goto exit;
+	}
+
+	for (idx_q = 0; idx_q < conf_queues; idx_q++) {
+
+		ret = rte_eth_rx_queue_setup(conf_port_id, idx_q,
+				conf_nb_descriptors, rte_lcore_to_socket_id(idx_q),
+				NULL, mpool);
+
+		if (ret) {
+			fprintf(stderr, "rte_eth_rx_queue_setup: err=%d, port=%u\n",
+					ret, conf_port_id);
+			goto exit;
+		}
+
+		ret = rte_eth_tx_queue_setup(conf_port_id, idx_q,
+				conf_nb_descriptors, rte_lcore_to_socket_id(idx_q), NULL);
+		if (ret) {
+			fprintf(stderr, "rte_eth_tx_queue_setup: err=%d, port=%u\n",
+					ret, conf_port_id);
+			goto exit;
+		}
+	}
+
+	rte_eth_macaddr_get(conf_port_id, &port_eth_addr);
+
+	ret = rte_eth_dev_start(conf_port_id);
+	if (ret) {
+		fprintf(stderr, "rte_eth_dev_start: err=%d, port=%u\n",
+				ret, conf_port_id);
+			goto exit;
+	}
+
+	printf("Port %d: %02x:%02x:%02x:%02x:%02x:%02x started!\n",
+				conf_port_id,
+				(uint8_t)port_eth_addr.addr_bytes[0],
+				(uint8_t)port_eth_addr.addr_bytes[1],
+				port_eth_addr.addr_bytes[2],
+				port_eth_addr.addr_bytes[3],
+				port_eth_addr.addr_bytes[4],
+				port_eth_addr.addr_bytes[5]);
+
+	rte_eth_promiscuous_enable(conf_port_id);
+
+	/* Create communication lists, one per queue. */
+	for (idx_q = 0; idx_q < MAX_QUEUES; idx_q++) {
+		comm_list_fwd[idx_q] = NULL;
+
+		if (idx_q < conf_queues) {
+			comm_list_fwd[idx_q] = rte_gpu_comm_create_list(conf_gpu_id, NUM_COMM_ITEMS);
+			if (comm_list_fwd[idx_q] == NULL) {
+				fprintf(stderr, "rte_gpu_comm_create_list returned error %d\n", ret);
+				goto exit;
+			}
+			ret = rte_gpu_comm_cleanup_list(&(comm_list_fwd[idx_q][0]));
+			if (ret < 0) {
+				fprintf(stderr, "rte_gpu_comm_cleanup_list returned error %d\n", ret);
+				goto exit;
+			}
+		}
+	}
+
+	core_id = 0;
+	for (idx_q = 0; idx_q < conf_queues; idx_q++) {
+		core_id = rte_get_next_lcore(core_id, 1, 0);
+		rte_eal_remote_launch(tx_core, NULL, core_id);
+
+		core_id = rte_get_next_lcore(core_id, 1, 0);
+		rte_eal_remote_launch(rx_core, NULL, core_id);
+	}
+
+	core_id = 0;
+	RTE_LCORE_FOREACH_WORKER(core_id) {
+		if (rte_eal_wait_lcore(core_id) < 0) {
+			fprintf(stderr, "bad exit for core %d.\n",
+					core_id);
+			break;
+		}
+	}
+
+	force_quit = true;
+
+	ret = rte_dev_dma_unmap(dev_info.device, (void *)ext_mem.buf_ptr,
+			RTE_BAD_IOVA, ext_mem.buf_len);
+	if (ret) {
+		fprintf(stderr,
+				"rte_dev_dma_unmap 0x%p -> %d (rte_errno = %d)\n",
+				(uint8_t *)ext_mem.buf_ptr, ret, rte_errno);
+		goto exit;
+	}
+
+	if (conf_mtype == MEMORY_CPU) {
+		ret = rte_gpu_mem_unregister(conf_gpu_id, ext_mem.buf_ptr);
+		if (ret < 0) {
+			fprintf(stderr, "rte_gpu_mem_unregister returned error %d\n", ret);
+			goto exit;
+		}
+
+		rte_free(ext_mem.buf_ptr);
+
+	} else {
+
+		ret = rte_extmem_unregister(ext_mem.buf_ptr, ext_mem.buf_len);
+		if (ret) {
+			fprintf(stderr, "rte_extmem_unregister failed with %d.\n", ret);
+			goto exit;
+		}
+
+		rte_gpu_mem_free(conf_gpu_id, (void *)ext_mem.buf_ptr);
+	}
+
+	rte_eth_dev_stop(conf_port_id);
+	rte_eth_dev_close(conf_port_id);
 
+exit:
 	/* clean up the EAL */
 	rte_eal_cleanup();
 
+	printf("Bye...\n");
 	return EXIT_SUCCESS;
 }
-- 
2.17.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 1/1] app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory
  2021-11-18  1:52 ` [PATCH v1 1/1] " eagostini
@ 2021-11-18  6:17   ` Jerin Jacob
  2021-11-18 10:16     ` Elena Agostini
  0 siblings, 1 reply; 7+ messages in thread
From: Jerin Jacob @ 2021-11-18  6:17 UTC (permalink / raw)
  To: Elena Agostini; +Cc: dpdk-dev

On Thu, Nov 18, 2021 at 12:28 AM <eagostini@nvidia.com> wrote:
>
> From: Elena Agostini <eagostini@nvidia.com>
>
> This patch introduces ethdev in test-gpudev app to provide:
> - an example to show how GPU memory can be used to send and receive packets
> - an useful tool to measure network metrics when using GPU memory with
> io forwarding
>
> With this feature test-gpudev can:
> - RX packets in CPU or GPU memory
> - Store packets in the gpudev communication list
> - TX receive packets from the communication list
>
> It's a simulation of a multi-core application.
>
> Signed-off-by: Elena Agostini <eagostini@nvidia.com>
> ---
>  app/test-gpudev/main.c | 471 +++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 452 insertions(+), 19 deletions(-)
>
> diff --git a/app/test-gpudev/main.c b/app/test-gpudev/main.c
> index 250fba6427..daa586c64e 100644
> --- a/app/test-gpudev/main.c
> +++ b/app/test-gpudev/main.c
> @@ -10,6 +10,8 @@
>  #include <stdarg.h>
>  #include <errno.h>
>  #include <getopt.h>
> +#include <stdbool.h>
> +#include <signal.h>
>
>  #include <rte_common.h>
>  #include <rte_malloc.h>
> @@ -19,22 +21,98 @@
>  #include <rte_ethdev.h>
>  #include <rte_mempool.h>
>  #include <rte_mbuf.h>
> +#include <rte_launch.h>
> +#include <rte_lcore.h>
> +#include <rte_per_lcore.h>
>
>  #include <rte_gpudev.h>
>
> +#ifndef ACCESS_ONCE
> +#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&x)
> +#endif
> +
> +#ifndef WRITE_ONCE
> +#define WRITE_ONCE(x, v) (ACCESS_ONCE(x) = (v))
> +#endif

Better to have a public version of this macro as it uses just in this
test application.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 1/1] app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory
  2021-11-18  6:17   ` Jerin Jacob
@ 2021-11-18 10:16     ` Elena Agostini
  0 siblings, 0 replies; 7+ messages in thread
From: Elena Agostini @ 2021-11-18 10:16 UTC (permalink / raw)
  To: Jerin Jacob; +Cc: dpdk-dev

[-- Attachment #1: Type: text/plain, Size: 2239 bytes --]

> From: Jerin Jacob <jerinjacobk@gmail.com>
> Date: Thursday, 18 November 2021 at 07:17
> To: Elena Agostini <eagostini@nvidia.com>
> Cc: dpdk-dev <dev@dpdk.org>
> Subject: Re: [PATCH v1 1/1] app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory
> External email: Use caution opening links or attachments>
>

> On Thu, Nov 18, 2021 at 12:28 AM <eagostini@nvidia.com> wrote:
> >
> > From: Elena Agostini <eagostini@nvidia.com>
> >
> > This patch introduces ethdev in test-gpudev app to provide:
> > - an example to show how GPU memory can be used to send and receive packets
> > - an useful tool to measure network metrics when using GPU memory with
> > io forwarding
> >
> > With this feature test-gpudev can:
> > - RX packets in CPU or GPU memory
> > - Store packets in the gpudev communication list
> > - TX receive packets from the communication list
> >
> > It's a simulation of a multi-core application.
> >
> > Signed-off-by: Elena Agostini <eagostini@nvidia.com>
> > ---
> >  app/test-gpudev/main.c | 471 +++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 452 insertions(+), 19 deletions(-)
> >
> > diff --git a/app/test-gpudev/main.c b/app/test-gpudev/main.c
> > index 250fba6427..daa586c64e 100644
> > --- a/app/test-gpudev/main.c
> > +++ b/app/test-gpudev/main.c
> > @@ -10,6 +10,8 @@
> >  #include <stdarg.h>
> >  #include <errno.h>
> >  #include <getopt.h>
> > +#include <stdbool.h>
> > +#include <signal.h>
> >
> >  #include <rte_common.h>
> >  #include <rte_malloc.h>
> > @@ -19,22 +21,98 @@
> >  #include <rte_ethdev.h>
> >  #include <rte_mempool.h>
> >  #include <rte_mbuf.h>
> > +#include <rte_launch.h>
> > +#include <rte_lcore.h>
> > +#include <rte_per_lcore.h>
> >
> >  #include <rte_gpudev.h>
> >
> > +#ifndef ACCESS_ONCE
> > +#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&x)
> > +#endif
> > +
> > +#ifndef WRITE_ONCE
> > +#define WRITE_ONCE(x, v) (ACCESS_ONCE(x) = (v))
> > +#endif>

> Better to have a public version of this macro as it uses just in this
> test application.

Thanks for taking time to review this patch.
I can actually use the RTE_GPU_VOLATILE macro exposed in the gpudev library
to replace both of them.

[-- Attachment #2: Type: text/html, Size: 8813 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2 0/1] app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory
  2021-11-18  1:52 [PATCH v1 0/1] app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory eagostini
  2021-11-18  1:52 ` [PATCH v1 1/1] " eagostini
@ 2021-11-18 18:56 ` eagostini
  2021-11-18 18:56   ` [PATCH v2 1/1] " eagostini
  1 sibling, 1 reply; 7+ messages in thread
From: eagostini @ 2021-11-18 18:56 UTC (permalink / raw)
  To: dev; +Cc: Elena Agostini

From: Elena Agostini <eagostini@nvidia.com>

This patch introduces ethdev in test-gpudev app to provide:
- an example to show how GPU memory can be used to send and receive packets
- an useful tool to measure network metrics when using GPU memory with
io forwarding

With this feature test-gpudev can:
- RX packets in CPU or GPU memory
- Store packets in the gpudev communication list
- TX receive packets from the communication list

It's a simulation of a multi-core application.

Changelog:
- Address review comments
- Minor improvements

Elena Agostini (1):
  app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory

 app/test-gpudev/main.c | 477 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 458 insertions(+), 19 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2 1/1] app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory
  2021-11-18 18:56 ` [PATCH v2 0/1] " eagostini
@ 2021-11-18 18:56   ` eagostini
  2023-07-06 18:58     ` Stephen Hemminger
  0 siblings, 1 reply; 7+ messages in thread
From: eagostini @ 2021-11-18 18:56 UTC (permalink / raw)
  To: dev; +Cc: Elena Agostini

From: Elena Agostini <eagostini@nvidia.com>

This patch introduces ethdev in test-gpudev app to provide:
- an example to show how GPU memory can be used to send and receive packets
- an useful tool to measure network metrics when using GPU memory with
io forwarding

With this feature test-gpudev can:
- RX packets in CPU or GPU memory
- Store packets in the gpudev communication list
- TX receive packets from the communication list

It's a simulation of a multi-core application.

Signed-off-by: Elena Agostini <eagostini@nvidia.com>
---
 app/test-gpudev/main.c | 477 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 458 insertions(+), 19 deletions(-)

diff --git a/app/test-gpudev/main.c b/app/test-gpudev/main.c
index 250fba6427..18de023208 100644
--- a/app/test-gpudev/main.c
+++ b/app/test-gpudev/main.c
@@ -10,6 +10,8 @@
 #include <stdarg.h>
 #include <errno.h>
 #include <getopt.h>
+#include <stdbool.h>
+#include <signal.h>
 
 #include <rte_common.h>
 #include <rte_malloc.h>
@@ -19,22 +21,90 @@
 #include <rte_ethdev.h>
 #include <rte_mempool.h>
 #include <rte_mbuf.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
 
 #include <rte_gpudev.h>
 
+#define GPU_PAGE_SHIFT   16
+#define GPU_PAGE_SIZE    (1UL << GPU_PAGE_SHIFT)
+#define GPU_PAGE_OFFSET  (GPU_PAGE_SIZE-1)
+#define GPU_PAGE_MASK    (~GPU_PAGE_OFFSET)
+
+#define MAX_QUEUES 16
+#define NUM_COMM_ITEMS 2048
+#define PKT_GAP 4
+
+// #define DEBUG_PRINT 1
+
 enum app_args {
 	ARG_HELP,
-	ARG_MEMPOOL
+	ARG_BURST,
+	ARG_GPU,
+	ARG_MBUFD,
+	ARG_MEMORY,
+	ARG_QUEUES,
+	ARG_TESTAPI,
+};
+
+enum mem_type {
+	MEMORY_CPU,
+	MEMORY_GPU
+};
+
+/* Options configurable from cmd line */
+static uint32_t conf_burst = 64;
+static uint16_t conf_gpu_id = 0;
+static enum mem_type conf_mtype = MEMORY_CPU;
+static uint32_t conf_mbuf_dataroom = 2048;
+static uint32_t conf_queues = 1;
+static bool conf_testapi = false;
+static uint16_t conf_nb_descriptors = 2048;
+
+/* Options statically defined */
+static uint32_t conf_nb_mbuf = 16384;
+static uint16_t conf_port_id = 0;
+
+/* Other variables */
+static volatile bool force_quit;
+static struct rte_mempool *mpool;
+static struct rte_pktmbuf_extmem ext_mem;
+struct rte_gpu_comm_list *comm_list_fwd[MAX_QUEUES];
+struct rte_ether_addr port_eth_addr;
+static struct rte_eth_conf port_conf = {
+	.rxmode = {
+		.mq_mode = ETH_MQ_RX_RSS,
+		.split_hdr_size = 0,
+		.offloads = 0,
+	},
+	.txmode = {
+		.mq_mode = ETH_MQ_TX_NONE,
+		.offloads = 0,
+	},
+	.rx_adv_conf = {
+		.rss_conf = {
+			.rss_key = NULL,
+			.rss_hf = ETH_RSS_IP
+		},
+	},
 };
 
 static void
 usage(const char *prog_name)
 {
-	printf("%s [EAL options] --\n",
+	printf("%s [EAL options] --\n"
+		" --help\n"
+		" --burst N: number of packets per rx burst\n"
+		" --gpu N: GPU ID to use\n"
+		" --memory N: external mempool memory type, 0 CPU, 1 GPU\n"
+		" --mbufd N: mbuf dataroom size\n"
+		" --testapi: test gpudev function\n"
+		" --queues N: number of RX queues\n",
 		prog_name);
 }
 
-static void
+static int
 args_parse(int argc, char **argv)
 {
 	char **argvopt;
@@ -42,7 +112,19 @@ args_parse(int argc, char **argv)
 	int opt_idx;
 
 	static struct option lgopts[] = {
-		{ "help", 0, 0, ARG_HELP},
+		{ "help",  0, 0, ARG_HELP},
+		/* Packets per burst. */
+		{ "burst",  1, 0, ARG_BURST},
+		/* GPU to use. */
+		{ "gpu",  1, 0, ARG_GPU},
+		/* Type of memory for the mempool. */
+		{ "memory",  1, 0, ARG_MEMORY},
+		/* Size of mbufs dataroom */
+		{ "mbufd", 1, 0, ARG_MBUFD},
+		/* Number of RX queues */
+		{ "queues", 1, 0, ARG_QUEUES},
+		/* Test only gpudev functions */
+		{ "testapi", 0, 0, ARG_TESTAPI},
 		/* End of options */
 		{ 0, 0, 0, 0 }
 	};
@@ -51,6 +133,24 @@ args_parse(int argc, char **argv)
 	while ((opt = getopt_long(argc, argvopt, "",
 				lgopts, &opt_idx)) != EOF) {
 		switch (opt) {
+		case ARG_BURST:
+			conf_burst = (uint32_t) atoi(optarg);
+			break;
+		case ARG_GPU:
+			conf_gpu_id = (uint16_t) atoi(optarg);
+			break;
+		case ARG_MEMORY:
+			conf_mtype = (atoi(optarg) == 1 ? MEMORY_GPU : MEMORY_CPU);
+			break;
+		case ARG_MBUFD:
+			conf_mbuf_dataroom = (uint32_t) atoi(optarg);
+			break;
+		case ARG_QUEUES:
+			conf_queues = (uint32_t) atoi(optarg);
+			break;
+		case ARG_TESTAPI:
+			conf_testapi = (atoi(optarg) == 1 ? true : false);
+			break;
 		case ARG_HELP:
 			usage(argv[0]);
 			break;
@@ -60,6 +160,19 @@ args_parse(int argc, char **argv)
 			break;
 		}
 	}
+
+	if (conf_queues > MAX_QUEUES) {
+		fprintf(stderr, "Can't support more than %d queues\n", MAX_QUEUES);
+		return -1;
+	}
+
+	if (conf_queues * 2 > rte_lcore_count()) {
+		fprintf(stderr, "Need to use at least %d cores to support %d RX/TX queues (EAL cores %d)\n",
+				conf_queues * 2, conf_queues, rte_lcore_count());
+		return -1;
+	}
+
+	return 0;
 }
 
 static int
@@ -342,13 +455,130 @@ create_update_comm_list(uint16_t gpu_id)
 	return -1;
 }
 
+static void
+signal_handler(int signum)
+{
+	if (signum == SIGINT || signum == SIGTERM) {
+		printf("\n\nSignal %d received, preparing to exit...\n",
+				signum);
+		force_quit = true;
+	}
+}
+
+static int
+rx_core(__rte_unused void *arg)
+{
+	uint32_t queue_id;
+	uint32_t nb_rx = 0;
+	int ret = 0;
+	int comm_list_item = 0;
+	struct rte_mbuf *rx_mbufs[RTE_GPU_COMM_LIST_PKTS_MAX];
+
+	queue_id = (rte_lcore_index(rte_lcore_id()) - 1) / 2;
+
+	if (queue_id > conf_queues) {
+		fprintf(stderr, "Please specify the right list of cores (%d cores) in EAL params to support %d queues.\n",
+				conf_queues*2, conf_queues);
+		RTE_GPU_VOLATILE(force_quit) = true;
+		return -1;
+	}
+
+	printf("RX core started on queue %d.\n", queue_id);
+
+	while (force_quit == false) {
+
+		nb_rx = 0;
+		while (nb_rx < RTE_GPU_COMM_LIST_PKTS_MAX &&
+				nb_rx < (conf_burst - PKT_GAP) &&
+				force_quit == false) {
+			nb_rx += rte_eth_rx_burst(conf_port_id, queue_id,
+					&(rx_mbufs[nb_rx]),
+					(conf_burst - nb_rx));
+		}
+
+		ret = rte_gpu_comm_populate_list_pkts(
+				&(comm_list_fwd[queue_id][comm_list_item]), rx_mbufs, nb_rx);
+		if (ret) {
+			fprintf(stderr,	"rte_gpu_comm_populate_list_pkts error %d.\n", ret);
+			return -1;
+		}
+
+#ifdef DEBUG_PRINT
+		printf("RX %d pkts from item %d\n",
+			comm_list_fwd[queue_id][comm_list_item].num_pkts,
+			comm_list_item);
+#endif
+
+		RTE_GPU_VOLATILE(comm_list_fwd[queue_id][comm_list_item].status) = RTE_GPU_COMM_LIST_DONE;
+
+		comm_list_item = (comm_list_item+1) % NUM_COMM_ITEMS;
+	}
+
+	return 0;
+}
+
+static int
+tx_core(__rte_unused void *arg)
+{
+	uint32_t queue_id = 0;
+	uint32_t nb_tx = 0;
+	int ret = 0;
+	int comm_list_item = 0;
+
+	queue_id = (rte_lcore_index(rte_lcore_id()) - 1) / 2;
+	if (queue_id > conf_queues) {
+		fprintf(stderr, "Please specify the right list of cores (%d cores) in EAL params to support %d queues.\n",
+				conf_queues*2, conf_queues);
+		RTE_GPU_VOLATILE(force_quit) = true;
+		return -1;
+	}
+	printf("TX core started on queue %d.\n", queue_id);
+
+	while (force_quit == false) {
+
+#ifdef DEBUG_PRINT
+		printf("Waiting on item %d\n", comm_list_item);
+#endif
+		while (RTE_GPU_VOLATILE(comm_list_fwd[queue_id][comm_list_item].status) !=
+				RTE_GPU_COMM_LIST_DONE && force_quit == false);
+
+		nb_tx = 0;
+		while (nb_tx < comm_list_fwd[queue_id][comm_list_item].num_pkts) {
+			nb_tx += rte_eth_tx_burst(conf_port_id, queue_id,
+					&(comm_list_fwd[queue_id][comm_list_item].mbufs[nb_tx]),
+					comm_list_fwd[queue_id][comm_list_item].num_pkts - nb_tx);
+		}
+		rte_wmb();
+
+#ifdef DEBUG_PRINT
+		printf("TX %d/%d pkts from item %d\n",
+				nb_tx, comm_list_fwd[queue_id][comm_list_item].num_pkts,
+				comm_list_item);
+#endif
+		ret = rte_gpu_comm_cleanup_list(&(comm_list_fwd[queue_id][comm_list_item]));
+		if (ret) {
+			fprintf(stderr, "rte_gpu_comm_cleanup_list error %d.\n", ret);
+			return -1;
+		}
+
+		rte_mb();
+
+		comm_list_item = (comm_list_item+1) % NUM_COMM_ITEMS;
+	}
+
+	return 0;
+}
+
 int
 main(int argc, char **argv)
 {
-	int ret;
+	int ret, core_id;
 	int nb_gpus = 0;
+	int nb_ports = 0;
 	int16_t gpu_id = 0;
+	uint32_t idx_q = 0;
 	struct rte_gpu_info ginfo;
+	struct rte_eth_dev_info dev_info;
 
 	/* Init EAL. */
 	ret = rte_eal_init(argc, argv);
@@ -356,8 +586,14 @@ main(int argc, char **argv)
 		rte_exit(EXIT_FAILURE, "EAL init failed\n");
 	argc -= ret;
 	argv += ret;
-	if (argc > 1)
-		args_parse(argc, argv);
+	if (argc > 1) {
+		ret = args_parse(argc, argv);
+		if (ret) {
+			fprintf(stderr, "Input args error.\n");
+			goto exit;
+		}
+	}
+
 	argc -= ret;
 	argv += ret;
 
@@ -381,25 +617,228 @@ main(int argc, char **argv)
 
 	if (nb_gpus == 0) {
 		fprintf(stderr, "Need at least one GPU on the system to run the example\n");
-		return EXIT_FAILURE;
+		goto exit;
 	}
 
-	gpu_id = 0;
+	if (nb_gpus < conf_gpu_id) {
+		fprintf(stderr, "Not enough GPUs in the system (%d / %d).\n", nb_gpus, conf_gpu_id);
+		goto exit;
+	}
 
-	/**
-	 * Memory tests
-	 */
-	alloc_gpu_memory(gpu_id);
-	register_cpu_memory(gpu_id);
+	if (conf_testapi == true) {
+		/* Memory tests */
+		alloc_gpu_memory(gpu_id);
+		register_cpu_memory(gpu_id);
 
-	/**
-	 * Communication items test
-	 */
-	create_update_comm_flag(gpu_id);
-	create_update_comm_list(gpu_id);
+		/* Communication items test */
+		create_update_comm_flag(gpu_id);
+		create_update_comm_list(gpu_id);
+
+		goto exit;
+	}
+
+	force_quit = false;
+	signal(SIGINT, signal_handler);
+	signal(SIGTERM, signal_handler);
+
+	nb_ports = rte_eth_dev_count_avail();
+	if (nb_ports == 0)
+		rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
+
+	ret = rte_eth_dev_info_get(conf_port_id, &dev_info);
+	if (ret) {
+		fprintf(stderr, "rte_eth_dev_info_get failed with %d.\n", ret);
+		goto exit;
+	}
+
+	/* Create external memory mempool. */
+	ext_mem.elt_size = conf_mbuf_dataroom + RTE_PKTMBUF_HEADROOM;
+	ext_mem.buf_len = RTE_ALIGN_CEIL(conf_nb_mbuf * ext_mem.elt_size, GPU_PAGE_SIZE);
+
+	if (conf_mtype == MEMORY_CPU) {
+		ext_mem.buf_ptr = rte_malloc("extmem", ext_mem.buf_len, 0);
+		if (ext_mem.buf_ptr == NULL) {
+			fprintf(stderr, "Could not allocate CPU DPDK memory.\n");
+			goto exit;
+		}
+
+		ret = rte_gpu_mem_register(conf_gpu_id, ext_mem.buf_len, ext_mem.buf_ptr);
+		if (ret < 0) {
+			fprintf(stderr,
+					"rte_gpu_mem_register CPU memory returned error %d.\n", ret);
+			return -1;
+		}
+	} else {
+		ext_mem.buf_iova = RTE_BAD_IOVA;
+
+		ext_mem.buf_ptr = rte_gpu_mem_alloc(conf_gpu_id, ext_mem.buf_len);
+		if (ext_mem.buf_ptr == NULL) {
+			fprintf(stderr, "Could not allocate GPU device memory.\n");
+			goto exit;
+		}
+
+		ret = rte_extmem_register(ext_mem.buf_ptr, ext_mem.buf_len,
+				NULL, ext_mem.buf_iova, GPU_PAGE_SIZE);
+		if (ret) {
+			fprintf(stderr, "Unable to register addr 0x%p, ret %d.\n", ext_mem.buf_ptr, ret);
+			goto exit;
+		}
+	}
+
+	/* DMA map the external memory. */
+	ret = rte_dev_dma_map(dev_info.device, ext_mem.buf_ptr,
+			ext_mem.buf_iova, ext_mem.buf_len);
+	if (ret) {
+		fprintf(stderr, "Could not DMA map EXT memory.\n");
+		goto exit;
+	}
+
+	/* Create external memory mempool. */
+	mpool = rte_pktmbuf_pool_create_extbuf("payload_mpool", conf_nb_mbuf,
+			0, 0, ext_mem.elt_size,
+			rte_socket_id(), &ext_mem, 1);
+	if (mpool == NULL) {
+		fprintf(stderr, "Could not create EXT memory mempool.\n");
+		goto exit;
+	}
+
+	/* Queues configuration. */
+	ret = rte_eth_dev_configure(conf_port_id, conf_queues,
+			conf_queues, &port_conf);
+	if (ret < 0) {
+		fprintf(stderr,
+				"Cannot configure device: err=%d, port=%u queues=%u\n",
+				ret, conf_port_id, conf_queues);
+		goto exit;
+	}
+
+	ret = rte_eth_dev_adjust_nb_rx_tx_desc(conf_port_id,
+			&conf_nb_descriptors, &conf_nb_descriptors);
+	if (ret) {
+		fprintf(stderr,
+				"Cannot adjust number of descriptors: err=%d, port=%u\n",
+				ret, conf_port_id);
+		goto exit;
+	}
+
+	for (idx_q = 0; idx_q < conf_queues; idx_q++) {
+
+		ret = rte_eth_rx_queue_setup(conf_port_id, idx_q,
+				conf_nb_descriptors, rte_lcore_to_socket_id(idx_q),
+				NULL, mpool);
+
+		if (ret) {
+			fprintf(stderr, "rte_eth_rx_queue_setup: err=%d, port=%u\n",
+					ret, conf_port_id);
+			goto exit;
+		}
+
+		ret = rte_eth_tx_queue_setup(conf_port_id, idx_q,
+				conf_nb_descriptors, rte_lcore_to_socket_id(idx_q), NULL);
+		if (ret) {
+			fprintf(stderr, "rte_eth_tx_queue_setup: err=%d, port=%u\n",
+					ret, conf_port_id);
+			goto exit;
+		}
+	}
+
+	rte_eth_macaddr_get(conf_port_id, &port_eth_addr);
+
+	ret = rte_eth_dev_start(conf_port_id);
+	if (ret) {
+		fprintf(stderr, "rte_eth_dev_start: err=%d, port=%u\n",
+				ret, conf_port_id);
+			goto exit;
+	}
+
+	printf("Port %d: %02x:%02x:%02x:%02x:%02x:%02x started!\n",
+				conf_port_id,
+				(uint8_t)port_eth_addr.addr_bytes[0],
+				(uint8_t)port_eth_addr.addr_bytes[1],
+				port_eth_addr.addr_bytes[2],
+				port_eth_addr.addr_bytes[3],
+				port_eth_addr.addr_bytes[4],
+				port_eth_addr.addr_bytes[5]);
+
+	rte_eth_promiscuous_enable(conf_port_id);
+
+	/* Create communication lists, one per queue. */
+	for (idx_q = 0; idx_q < MAX_QUEUES; idx_q++) {
+		comm_list_fwd[idx_q] = NULL;
+
+		if (idx_q < conf_queues) {
+			comm_list_fwd[idx_q] = rte_gpu_comm_create_list(conf_gpu_id,
+					NUM_COMM_ITEMS);
+			if (comm_list_fwd[idx_q] == NULL) {
+				fprintf(stderr, "comm_create_list returned error %d\n",
+						ret);
+				goto exit;
+			}
+			ret = rte_gpu_comm_cleanup_list(&(comm_list_fwd[idx_q][0]));
+			if (ret < 0) {
+				fprintf(stderr, "comm_cleanup_list returned error %d\n",
+						ret);
+				goto exit;
+			}
+		}
+	}
+
+	core_id = 0;
+	for (idx_q = 0; idx_q < conf_queues; idx_q++) {
+		core_id = rte_get_next_lcore(core_id, 1, 0);
+		rte_eal_remote_launch(tx_core, NULL, core_id);
+
+		core_id = rte_get_next_lcore(core_id, 1, 0);
+		rte_eal_remote_launch(rx_core, NULL, core_id);
+	}
+
+	core_id = 0;
+	RTE_LCORE_FOREACH_WORKER(core_id) {
+		if (rte_eal_wait_lcore(core_id) < 0) {
+			fprintf(stderr, "bad exit for core %d.\n",
+					core_id);
+			break;
+		}
+	}
+
+	force_quit = true;
+
+	ret = rte_dev_dma_unmap(dev_info.device, (void *)ext_mem.buf_ptr,
+			RTE_BAD_IOVA, ext_mem.buf_len);
+	if (ret) {
+		fprintf(stderr,
+				"rte_dev_dma_unmap 0x%p -> %d (rte_errno = %d)\n",
+				(uint8_t *)ext_mem.buf_ptr, ret, rte_errno);
+		goto exit;
+	}
+
+	if (conf_mtype == MEMORY_CPU) {
+		ret = rte_gpu_mem_unregister(conf_gpu_id, ext_mem.buf_ptr);
+		if (ret < 0) {
+			fprintf(stderr, "rte_gpu_mem_unregister returned error %d\n", ret);
+			goto exit;
+		}
+
+		rte_free(ext_mem.buf_ptr);
+
+	} else {
+
+		ret = rte_extmem_unregister(ext_mem.buf_ptr, ext_mem.buf_len);
+		if (ret) {
+			fprintf(stderr, "rte_extmem_unregister failed with %d.\n", ret);
+			goto exit;
+		}
+
+		rte_gpu_mem_free(conf_gpu_id, (void *)ext_mem.buf_ptr);
+	}
+
+	rte_eth_dev_stop(conf_port_id);
+	rte_eth_dev_close(conf_port_id);
 
+exit:
 	/* clean up the EAL */
 	rte_eal_cleanup();
 
+	printf("Bye...\n");
 	return EXIT_SUCCESS;
 }
-- 
2.17.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/1] app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory
  2021-11-18 18:56   ` [PATCH v2 1/1] " eagostini
@ 2023-07-06 18:58     ` Stephen Hemminger
  0 siblings, 0 replies; 7+ messages in thread
From: Stephen Hemminger @ 2023-07-06 18:58 UTC (permalink / raw)
  To: eagostini; +Cc: dev

On Thu, 18 Nov 2021 18:56:13 +0000
<eagostini@nvidia.com> wrote:

More tests is always good, this seemed to have gotten stalled.
Probably because most people don't have Nvidia GPU and DPDK.

> +static void
> +signal_handler(int signum)
> +{
> +	if (signum == SIGINT || signum == SIGTERM) {
> +		printf("\n\nSignal %d received, preparing to exit...\n",
> +				signum);

Printf in signal handler is unsafe, don't do it.
We just removed this in other tests.

> +exit:
>  	/* clean up the EAL */
>  	rte_eal_cleanup();
>  
> +	printf("Bye...\n");

You don't need this. Just noise, why add it.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-07-06 18:58 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-18  1:52 [PATCH v1 0/1] app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory eagostini
2021-11-18  1:52 ` [PATCH v1 1/1] " eagostini
2021-11-18  6:17   ` Jerin Jacob
2021-11-18 10:16     ` Elena Agostini
2021-11-18 18:56 ` [PATCH v2 0/1] " eagostini
2021-11-18 18:56   ` [PATCH v2 1/1] " eagostini
2023-07-06 18:58     ` Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).