> > Could you, please, set "Author" correctly - "Elena Agostini "? > > Otherwise, we see in the git log: > > > > "Author: eagostini " > > > > Compare with: > > "Author: Bing Zhao " > > "Author: Viacheslav Ovsiienko " > > > > Also, please, see the codestyle issues, too many code lines far beyond 100 chars. > > Lines like this: > > + if (mbuf_mem_types[idx] == MBUF_MEM_GPU && strcmp(cur_fwd_eng->fwd_mode_name, "io") != 0) { > > can be easily be splitted. > > > > >>app/testpmd: add GPU memory option in iofwd > > As I see from the patch - it adds the new mbuf pool type (residing on GPU memory). > > May be, we should reword the title? > > " app/testpmd: add GPU memory option for mbuf pools" > > > > > > > > This patch introduces GPU memory in testpmd through the gpudev library. > > > Testpmd can be used for network benchmarks when using GPU memory > > > instead of regular CPU memory to send and receive packets. > > > This option is currently limited to iofwd engine. > > Why? Because iofwd the only mode not touching the mbuf data? > > Is it critical for functionality? Is GPU mbuf pool memory accessible from CPU side? > > I would explain the reasons (for supporting iofwd mode only) in commit message. > > > > > > > > Signed-off-by: Elena Agostini > > > > > > Depends-on: series-19465 ("GPU library") > > > Depends-on: series-20422 ("common/mlx5: fix external memory pool > > > registration") > > > --- > > > app/test-pmd/cmdline.c | 14 +++ > > > app/test-pmd/meson.build | 2 +- > > > app/test-pmd/parameters.c | 13 ++- > > > app/test-pmd/testpmd.c | 133 +++++++++++++++++++++++--- > > > app/test-pmd/testpmd.h | 16 +++- > > > doc/guides/testpmd_app_ug/run_app.rst | 3 + > > > 6 files changed, 164 insertions(+), 17 deletions(-) > > > > > > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index > > > 4f51b259fe..36193bc566 100644 > > > --- a/app/test-pmd/cmdline.c > > > +++ b/app/test-pmd/cmdline.c > > > @@ -3614,6 +3614,7 @@ parse_item_list(const char *str, const char > > > > The "parse_item_list()" is used to parse the list looking like "number0, number1, ...., numberN" > > and invoked for "core","port", "txtimes", etc. Not sure all of these params need to handle "g" > > suffix. We should allow "g" processing only for "mbuf-size". We have "item_name" argument > > to check whether we are invoked on "mbuf-size". > > > > > *item_name, unsigned int max_items, > > > unsigned int j; > > > int value_ok; > > > char c; > > > + int gpu_mbuf = 0; > > > > > > /* > > > * First parse all items in the list and store their value. > > > @@ -3628,6 +3629,14 @@ parse_item_list(const char *str, const char > > > *item_name, unsigned int max_items, > > > value_ok = 1; > > > continue; > > > } > > > + if (c == 'g') { > > We should check whether "g" is the single char suffix (last char). > > Otherwise, "20g48" and "g20gggg48" would be also valid values. > > > > > + /* > > > + * When this flag is set, mbufs for this segment > > > + * will be created on GPU memory. > > > + */ > > > + gpu_mbuf = 1; > > > + continue; > > > + } > > > if (c != ',') { > > > fprintf(stderr, "character %c is not a decimal digit\n", > > > c); > > > return 0; > > > @@ -3640,6 +3649,8 @@ parse_item_list(const char *str, const char > > > *item_name, unsigned int max_items, > > > parsed_items[nb_item] = value; > > > value_ok = 0; > > > value = 0; > > > + mbuf_mem_types[nb_item] = gpu_mbuf ? > > > MBUF_MEM_GPU : MBUF_MEM_CPU; > > > + gpu_mbuf = 0; > > > } > > > nb_item++; > > > } > > > @@ -3648,6 +3659,9 @@ parse_item_list(const char *str, const char > > > *item_name, unsigned int max_items, > > > item_name, nb_item + 1, max_items); > > > return 0; > > > } > > > + > > > + mbuf_mem_types[nb_item] = gpu_mbuf ? MBUF_MEM_GPU : > > > MBUF_MEM_CPU; > > > + > > > parsed_items[nb_item++] = value; > > > if (! check_unique_values) > > > return nb_item; > > > diff --git a/app/test-pmd/meson.build b/app/test-pmd/meson.build index > > > d5df52c470..5c8ca68c9d 100644 > > > --- a/app/test-pmd/meson.build > > > +++ b/app/test-pmd/meson.build > > > @@ -32,7 +32,7 @@ if dpdk_conf.has('RTE_HAS_JANSSON') > > > ext_deps += jansson_dep > > > endif > > > > > > -deps += ['ethdev', 'gro', 'gso', 'cmdline', 'metrics', 'bus_pci'] > > > +deps += ['ethdev', 'gro', 'gso', 'cmdline', 'metrics', 'bus_pci', > > > +'gpudev'] > > > if dpdk_conf.has('RTE_CRYPTO_SCHEDULER') > > > deps += 'crypto_scheduler' > > > endif > > > diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c index > > > 0974b0a38f..d41f7f220b 100644 > > > --- a/app/test-pmd/parameters.c > > > +++ b/app/test-pmd/parameters.c > > > @@ -87,7 +87,10 @@ usage(char* progname) > > > "in NUMA mode.\n"); > > > printf(" --mbuf-size=N,[N1[,..Nn]: set the data size of mbuf to " > > > "N bytes. If multiple numbers are specified the extra pools " > > > - "will be created to receive with packet split features\n"); > > > + "will be created to receive with packet split features\n" > > > + "Use 'g' suffix for GPU memory.\n" > > > + "If no or an unrecognized suffix is provided, CPU is > > > assumed\n"); > > Unrecognized suffix? I would emit an error and abort the launch. > > > > > + > > > printf(" --total-num-mbufs=N: set the number of mbufs to be > > > allocated " > > > "in mbuf pools.\n"); > > > printf(" --max-pkt-len=N: set the maximum size of packet to N > > > bytes.\n"); @@ -595,6 +598,7 @@ launch_args_parse(int argc, char** argv) > > > struct rte_eth_dev_info dev_info; > > > uint16_t rec_nb_pkts; > > > int ret; > > > + uint32_t idx = 0; > > > > > > static struct option lgopts[] = { > > > { "help", 0, 0, 0 }, > > > @@ -1538,4 +1542,11 @@ launch_args_parse(int argc, char** argv) > > > "ignored\n"); > > > mempool_flags = 0; > > > } > > > + > > > + for (idx = 0; idx < mbuf_data_size_n; idx++) { > > > + if (mbuf_mem_types[idx] == MBUF_MEM_GPU && > > > strcmp(cur_fwd_eng->fwd_mode_name, "io") != 0) { > > > + fprintf(stderr, "GPU memory mbufs can be used with > > > iofwd engine only\n"); > > > + rte_exit(EXIT_FAILURE, "Command line is > > > incorrect\n"); > > > + } > > > + } > > Please, note, the forwarding mode can be changed from interactive prompt with "set fwd " command. > > If iofwd mode is crucial for GPU functionality - we should prevent switching to other modes if GPU pools are engaged. > > > > > > > } > > > diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index > > > a66dfb297c..1af235c4d8 100644 > > > --- a/app/test-pmd/testpmd.c > > > +++ b/app/test-pmd/testpmd.c > > > @@ -205,6 +205,12 @@ uint32_t mbuf_data_size_n = 1; /* Number of > > > specified mbuf sizes. */ uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] > > > = { > > > DEFAULT_MBUF_DATA_SIZE > > > }; /**< Mbuf data space size. */ > > > + > > > +/* Mbuf memory types. */ > > > +enum mbuf_mem_type mbuf_mem_types[MAX_SEGS_BUFFER_SPLIT]; > > > +/* Pointers to external memory allocated for mempools. */ uintptr_t > > > +mempools_ext_ptr[MAX_SEGS_BUFFER_SPLIT]; > > > + > > > uint32_t param_total_num_mbufs = 0; /**< number of mbufs in all pools - if > > > * specified on command-line. */ uint16_t > > > stats_period; /**< Period to show statistics (disabled by default) */ @@ - > > > 543,6 +549,12 @@ int proc_id; > > > */ > > > unsigned int num_procs = 1; > > > > > > +/* > > > + * In case of GPU memory external mbufs use, for simplicity, > > > + * the first GPU device in the list. > > > + */ > > > +int gpu_id = 0; > > It is assigned with zero and never changes. Support the first GPU only? > > This limitation should be mentioned in documentation. > > > > > + > > > static void > > > eth_rx_metadata_negotiate_mp(uint16_t port_id) { @@ -1103,6 +1115,79 > > > @@ setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int > > > socket_id, > > > return ext_num; > > > } > > > > > > +static struct rte_mempool * > > > +gpu_mbuf_pool_create(uint16_t mbuf_seg_size, unsigned int nb_mbuf, > > > + unsigned int socket_id, uint16_t port_id, > > > + int gpu_id, uintptr_t * mp_addr) > > > +{ > > > + int ret = 0; > > > + char pool_name[RTE_MEMPOOL_NAMESIZE]; > > > + struct rte_eth_dev_info dev_info; > > > + struct rte_mempool *rte_mp = NULL; > > > + struct rte_pktmbuf_extmem gpu_mem; > > > + struct rte_gpu_info ginfo; > > > + uint8_t gpu_page_shift = 16; > > > + uint32_t gpu_page_size = (1UL << gpu_page_shift); > > > + > > > + ret = eth_dev_info_get_print_err(port_id, &dev_info); > > > + if (ret != 0) > > > + rte_exit(EXIT_FAILURE, > > > + "Failed to get device info for port %d\n", > > > + port_id); > > > + > > > + mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), > > > port_id, MBUF_MEM_GPU); > > > + if (!is_proc_primary()) { > > > + rte_mp = rte_mempool_lookup(pool_name); > > > + if (rte_mp == NULL) > > > + rte_exit(EXIT_FAILURE, > > > + "Get mbuf pool for socket %u failed: %s\n", > > > + socket_id, rte_strerror(rte_errno)); > > > + return rte_mp; > > > + } > > > + > > > + if (rte_gpu_info_get(gpu_id, &ginfo)) > > > + rte_exit(EXIT_FAILURE, "Can't retrieve info about GPU %d - > > > bye\n", > > > +gpu_id); > > > + > > > + TESTPMD_LOG(INFO, > > > + "create a new mbuf pool <%s>: n=%u, size=%u, socket=%u > > > GPU device=%s\n", > > > + pool_name, nb_mbuf, mbuf_seg_size, socket_id, ginfo.name); > > > + > > > + /* Create an external memory mempool using memory allocated on > > > the > > > +GPU. */ > > > + > > > + gpu_mem.elt_size = RTE_MBUF_DEFAULT_BUF_SIZE; > > > + gpu_mem.buf_len = RTE_ALIGN_CEIL(nb_mbuf * gpu_mem.elt_size, > > > gpu_page_size); > > > + gpu_mem.buf_iova = RTE_BAD_IOVA; > > > + > > > + gpu_mem.buf_ptr = rte_gpu_mem_alloc(gpu_id, gpu_mem.buf_len); > > > + if (gpu_mem.buf_ptr == NULL) > > > + rte_exit(EXIT_FAILURE, "Could not allocate GPU device > > > memory\n"); > > > + > > > + ret = rte_extmem_register(gpu_mem.buf_ptr, gpu_mem.buf_len, > > > NULL, gpu_mem.buf_iova, gpu_page_size); > > > + if (ret) > > > + rte_exit(EXIT_FAILURE, "Unable to register addr 0x%p, ret > > > %d\n", > > > +gpu_mem.buf_ptr, ret); > > > + > > > + uint16_t pid = 0; > > > + > > > + RTE_ETH_FOREACH_DEV(pid) > > > + { > > > + ret = rte_dev_dma_map(dev_info.device, gpu_mem.buf_ptr, > > > + gpu_mem.buf_iova, > > > gpu_mem.buf_len); > > > + if (ret) { > > > + rte_exit(EXIT_FAILURE, "Unable to DMA map addr > > > 0x%p for device %s\n", > > > + gpu_mem.buf_ptr, dev_info.device- > > > >name); > > > + } > > > + } > > > + > > > + rte_mp = rte_pktmbuf_pool_create_extbuf(pool_name, nb_mbuf, > > > mb_mempool_cache, 0, mbuf_seg_size, socket_id, &gpu_mem, 1); > > > + if (rte_mp == NULL) { > > > + rte_exit(EXIT_FAILURE, "Creation of GPU mempool <%s> > > > failed\n", pool_name); > > > + } > > > + > > > + *mp_addr = (uintptr_t) gpu_mem.buf_ptr; > > > + > > > + return rte_mp; > > > +} > > > + > > > /* > > > * Configuration initialisation done once at init time. > > > */ > > > @@ -1117,7 +1202,7 @@ mbuf_pool_create(uint16_t mbuf_seg_size, > > > unsigned nb_mbuf, > > > > > > mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size; #endif > > > - mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), > > > size_idx); > > > + mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), > > > size_idx, > > > +MBUF_MEM_CPU); > > > if (!is_proc_primary()) { > > > rte_mp = rte_mempool_lookup(pool_name); > > > if (rte_mp == NULL) > > > @@ -1700,19 +1785,42 @@ init_config(void) > > > > > > for (i = 0; i < num_sockets; i++) > > > for (j = 0; j < mbuf_data_size_n; j++) > > > - mempools[i * MAX_SEGS_BUFFER_SPLIT + j] > > > = > > > - mbuf_pool_create(mbuf_data_size[j], > > > - nb_mbuf_per_pool, > > > - socket_ids[i], j); > > > + { > > > + if (mbuf_mem_types[j] == MBUF_MEM_GPU) > > > { > > > + if (rte_gpu_count_avail() == 0) > > > + rte_exit(EXIT_FAILURE, "No > > > GPU device available.\n"); > > > + > > > + mempools[i * > > > MAX_SEGS_BUFFER_SPLIT + j] = > > > + > > > gpu_mbuf_pool_create(mbuf_data_size[j], > > What about GPU/CPU adherence ? Do we create one GPU pool per CPU socket? > > Disregarding hardware topology at all? > > We can mitigate the issue by "--socket-mem/--socket-num" parameter though. > > > > > + > > > nb_mbuf_per_pool, > > > + > > > socket_ids[i], j, gpu_id, > > > + > > > &(mempools_ext_ptr[j])); > > > + } else { > > > + mempools[i * > > > MAX_SEGS_BUFFER_SPLIT + j] = > > > + > > > mbuf_pool_create(mbuf_data_size[j], > > > + > > > nb_mbuf_per_pool, > > > + socket_ids[i], > > > j); > > > + } > > > + } > > > } else { > > > uint8_t i; > > > > > > for (i = 0; i < mbuf_data_size_n; i++) > > > - mempools[i] = mbuf_pool_create > > > - (mbuf_data_size[i], > > > - nb_mbuf_per_pool, > > > - socket_num == UMA_NO_CONFIG ? > > > - 0 : socket_num, i); > > > + { > > > + if (mbuf_mem_types[i] == MBUF_MEM_GPU) { > > > + mempools[i] = > > > gpu_mbuf_pool_create(mbuf_data_size[i], > > > + > > > nb_mbuf_per_pool, > > > + > > > socket_num == UMA_NO_CONFIG ? 0 : socket_num, > > > + > > > i, gpu_id, > > > + > > > &(mempools_ext_ptr[i])); > > > + } else { > > > + mempools[i] = > > > mbuf_pool_create(mbuf_data_size[i], > > > + nb_mbuf_per_pool, > > > + socket_num == > > > UMA_NO_CONFIG ? > > > + 0 : socket_num, i); > > > + } > > > + } > > > + > > > } > > > > > > init_port_config(); > > > @@ -3415,8 +3523,11 @@ pmd_test_exit(void) > > > } > > > } > > > for (i = 0 ; i < RTE_DIM(mempools) ; i++) { > > > - if (mempools[i]) > > > + if (mempools[i]) { > > > mempool_free_mp(mempools[i]); > > > + if (mbuf_mem_types[i] == MBUF_MEM_GPU) > > > + rte_gpu_mem_free(gpu_id, (void > > > *)mempools_ext_ptr[i]); > > > + } > > > } > > > free(xstats_display); > > > > > > diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index > > > 669ce1e87d..9919044372 100644 > > > --- a/app/test-pmd/testpmd.h > > > +++ b/app/test-pmd/testpmd.h > > > @@ -12,6 +12,7 @@ > > > #include > > > #include > > > #include > > > +#include > > > #include > > > #include > > > #ifdef RTE_HAS_JANSSON > > > @@ -474,6 +475,11 @@ extern uint8_t dcb_config; extern uint32_t > > > mbuf_data_size_n; extern uint16_t > > > mbuf_data_size[MAX_SEGS_BUFFER_SPLIT]; > > > /**< Mbuf data space size. */ > > > +enum mbuf_mem_type { > > > + MBUF_MEM_CPU, > > > + MBUF_MEM_GPU > > > +}; > > > +extern enum mbuf_mem_type > > > mbuf_mem_types[MAX_SEGS_BUFFER_SPLIT]; > > > extern uint32_t param_total_num_mbufs; > > > > > > extern uint16_t stats_period; > > > @@ -717,14 +723,16 @@ current_fwd_lcore(void) > > > /* Mbuf Pools */ > > > static inline void > > > mbuf_poolname_build(unsigned int sock_id, char *mp_name, > > > - int name_size, uint16_t idx) > > > + int name_size, uint16_t idx, enum mbuf_mem_type > > > mem_type) > > > { > > > + const char *suffix = mem_type == MBUF_MEM_GPU ? "_gpu" : ""; > > > + > > > if (!idx) > > > snprintf(mp_name, name_size, > > > - MBUF_POOL_NAME_PFX "_%u", sock_id); > > > + MBUF_POOL_NAME_PFX "_%u%s", sock_id, suffix); > > > else > > > snprintf(mp_name, name_size, > > > - MBUF_POOL_NAME_PFX "_%hu_%hu", > > > (uint16_t)sock_id, idx); > > > + MBUF_POOL_NAME_PFX "_%hu_%hu%s", > > > (uint16_t)sock_id, idx, suffix); > > > } > > > > > > static inline struct rte_mempool * > > > @@ -732,7 +740,7 @@ mbuf_pool_find(unsigned int sock_id, uint16_t idx) { > > > char pool_name[RTE_MEMPOOL_NAMESIZE]; > > > > > > - mbuf_poolname_build(sock_id, pool_name, sizeof(pool_name), idx); > > > + mbuf_poolname_build(sock_id, pool_name, sizeof(pool_name), idx, > > > +mbuf_mem_types[idx]); > > > return rte_mempool_lookup((const char *)pool_name); } > > > > > > diff --git a/doc/guides/testpmd_app_ug/run_app.rst > > > b/doc/guides/testpmd_app_ug/run_app.rst > > > index 30edef07ea..ede7b79abb 100644 > > > --- a/doc/guides/testpmd_app_ug/run_app.rst > > > +++ b/doc/guides/testpmd_app_ug/run_app.rst > > > @@ -119,6 +119,9 @@ The command line options are: > > > The default value is 2048. If multiple mbuf-size values are specified the > > > extra memory pools will be created for allocating mbufs to receive packets > > > with buffer splitting features. > > > + Providing mbuf size with a 'g' suffix (e.g. ``--mbuf-size=2048g``), > > > + will cause the mempool to be created on a GPU memory area allocated. > > > + This option is currently limited to iofwd engine with the first GPU. > > Mmm, if we have multiple GPUs - we have no way to specify on which one we should allocate the pool. > > Syntax is not completeā˜¹. Possible we should specify GPU port id after the suffix ? Like 2048g2 ? > > If port index is omitted we should consider we have the only GPU and check this. > > Do we need to overload --mbuf-size parameter here? > Why not add 'gpu' option to --mp-alloc parameter? --mbuf-size is preferred to --mp-alloc because with --mbuf-size you can enable buffer split feature allocating multiple mempools with different mbufs sizes on different type of memories. An example would be --mbuf-size=2048g,1024 which will create two mempools to split packets, one in GPU memory and another in CPU memory