From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 6A706A0C41; Wed, 17 Nov 2021 15:04:12 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id EBFAA4114D; Wed, 17 Nov 2021 15:04:11 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by mails.dpdk.org (Postfix) with ESMTP id 5F64D40687 for ; Wed, 17 Nov 2021 15:04:09 +0100 (CET) X-IronPort-AV: E=McAfee;i="6200,9189,10170"; a="220836460" X-IronPort-AV: E=Sophos;i="5.87,241,1631602800"; d="scan'208";a="220836460" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 17 Nov 2021 06:04:07 -0800 X-IronPort-AV: E=Sophos;i="5.87,241,1631602800"; d="scan'208";a="506934279" Received: from bricha3-mobl.ger.corp.intel.com ([10.252.19.204]) by orsmga008-auth.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-SHA; 17 Nov 2021 06:04:06 -0800 Date: Wed, 17 Nov 2021 14:04:03 +0000 From: Bruce Richardson To: eagostini@nvidia.com Cc: dev@dpdk.org Subject: Re: [PATCH v4 1/1] app/testpmd: add GPU memory option for mbuf pools Message-ID: References: <20211029204909.21318-1-eagostini@nvidia.com> <20211117214923.15655-1-eagostini@nvidia.com> <20211117214923.15655-2-eagostini@nvidia.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20211117214923.15655-2-eagostini@nvidia.com> X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org On Wed, Nov 17, 2021 at 09:49:23PM +0000, eagostini@nvidia.com wrote: > From: Elena Agostini > > This patch introduces GPU memory in testpmd through the gpudev library. > Testpmd can be used for network benchmarks when using GPU memory > instead of regular CPU memory to send and receive packets. > This option is currently limited to iofwd engine to ensure > no workload is applied on packets not accessible from the CPU. > > The options chose is --mbuf-size so buffer split feature across > different mempools can be enabled. > > Signed-off-by: Elena Agostini Ignoring for now the discussion about whether this patch should be accepted or not (and I would share many of the concerns about it going into testpmd) - a few comments on the code changes themselves below. /Bruce > --- > app/test-pmd/cmdline.c | 36 +++++++- > app/test-pmd/config.c | 4 +- > app/test-pmd/icmpecho.c | 2 +- > app/test-pmd/meson.build | 3 + > app/test-pmd/parameters.c | 15 +++- > app/test-pmd/testpmd.c | 182 +++++++++++++++++++++++++++++++++++--- > app/test-pmd/testpmd.h | 18 +++- > 7 files changed, 240 insertions(+), 20 deletions(-) > > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c > index 3faa37db6d..400aa07da8 100644 > --- a/app/test-pmd/cmdline.c > +++ b/app/test-pmd/cmdline.c > @@ -3619,6 +3619,7 @@ parse_item_list(const char *str, const char *item_name, unsigned int max_items, > unsigned int j; > int value_ok; > char c; > + int gpu_mbuf = 0; > > /* > * First parse all items in the list and store their value. > @@ -3628,11 +3629,29 @@ parse_item_list(const char *str, const char *item_name, unsigned int max_items, > value_ok = 0; > for (i = 0; i < strnlen(str, STR_TOKEN_SIZE); i++) { > c = str[i]; > + Whitespace change unneeded for this patch, drop from diff. > if ((c >= '0') && (c <= '9')) { > value = (unsigned int) (value * 10 + (c - '0')); > value_ok = 1; > continue; > } > + if (c == 'g') { > + /* > + * When this flag is set, mbufs for this segment > + * will be created on GPU memory. > + */ > + if (i < strnlen(str, STR_TOKEN_SIZE) - 1 && str[i+1] != ',') { > + fprintf(stderr, "input param list is not well formatted\n"); > + return 0; > + } > + #ifdef RTE_LIB_GPUDEV > + gpu_mbuf = 1; > + #else > + fprintf(stderr, "gpudev library not built. Can't create mempools in GPU memory.\n"); > + return 0; > + #endif While I suppose it makes things more readable, the alignment here with the ifdefs is unusual. The standard in DPDK is that #ifdefs always start at the margin. If you want things inline like this, I suggest you convert the macro to a global variable at the top of the file and use that in regular C if conditions. > + continue; > + } > if (c != ',') { > fprintf(stderr, "character %c is not a decimal digit\n", c); > return 0; > @@ -3645,6 +3664,8 @@ parse_item_list(const char *str, const char *item_name, unsigned int max_items, > parsed_items[nb_item] = value; > value_ok = 0; > value = 0; > + mbuf_mem_types[nb_item] = gpu_mbuf ? MBUF_MEM_GPU : MBUF_MEM_CPU; > + gpu_mbuf = 0; > } > nb_item++; > } > @@ -3653,12 +3674,15 @@ parse_item_list(const char *str, const char *item_name, unsigned int max_items, > item_name, nb_item + 1, max_items); > return 0; > } > + > + mbuf_mem_types[nb_item] = gpu_mbuf ? MBUF_MEM_GPU : MBUF_MEM_CPU; > + > parsed_items[nb_item++] = value; > if (! check_unique_values) > return nb_item; > > /* > - * Then, check that all values in the list are different. > + * Then, check that all values in the list are differents. Original comment is correct. Remove this change from diff. > * No optimization here... > */ > for (i = 0; i < nb_item; i++) { > @@ -6874,6 +6898,16 @@ static void cmd_set_fwd_mode_parsed(void *parsed_result, > __rte_unused void *data) > { > struct cmd_set_fwd_mode_result *res = parsed_result; > + int idx; > + > + for (idx = 0; idx < MAX_SEGS_BUFFER_SPLIT; idx++) { > + if (mbuf_mem_types[idx] == MBUF_MEM_GPU && > + strcmp(res->mode, "io") != 0) { > + TESTPMD_LOG(ERR, > + "GPU memory mbufs can be used with iofwd engine only\n"); No point in wrapping like this, as you end up at the same column anyway. > + return; > + } > + } > > retry_enabled = 0; > set_pkt_forwarding_mode(res->mode); > diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c > index 6fca09527b..9e8f8f1462 100644 > --- a/app/test-pmd/config.c > +++ b/app/test-pmd/config.c > @@ -2966,7 +2966,7 @@ port_rss_reta_info(portid_t port_id, > } > > /* > - * Displays the RSS hash functions of a port, and, optionally, the RSS hash > + * Displays the RSS hash functions of a port, and, optionaly, the RSS hash Original comment spelling is correct. Drop from diff. > * key of the port. > */ > void > @@ -5255,7 +5255,7 @@ mcast_addr_pool_remove(struct rte_port *port, uint32_t addr_idx) > { > port->mc_addr_nb--; > if (addr_idx == port->mc_addr_nb) { > - /* No need to recompact the set of multicast addresses. */ > + /* No need to recompact the set of multicast addressses. */ > if (port->mc_addr_nb == 0) { > /* free the pool of multicast addresses. */ > free(port->mc_addr_pool); > diff --git a/app/test-pmd/icmpecho.c b/app/test-pmd/icmpecho.c > index 99c94cb282..3a85ec3dd1 100644 > --- a/app/test-pmd/icmpecho.c > +++ b/app/test-pmd/icmpecho.c > @@ -53,7 +53,7 @@ arp_op_name(uint16_t arp_op) > default: > break; > } > - return "Unknown ARP op"; > + return "Unkwown ARP op"; Introducting typo here too. Drop from diff. > } > > static const char * > diff --git a/app/test-pmd/meson.build b/app/test-pmd/meson.build > index 43130c8856..568660e18f 100644 > --- a/app/test-pmd/meson.build > +++ b/app/test-pmd/meson.build > @@ -43,6 +43,9 @@ if dpdk_conf.has('RTE_LIB_BPF') > sources += files('bpf_cmd.c') > deps += 'bpf' > endif > +if dpdk_conf.has('RTE_LIB_GPUDEV') > + deps += 'gpudev' > +endif > if dpdk_conf.has('RTE_LIB_GRO') > deps += 'gro' > endif > diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c > index f9185065af..dde5ef6590 100644 > --- a/app/test-pmd/parameters.c > +++ b/app/test-pmd/parameters.c > @@ -86,7 +86,10 @@ usage(char* progname) > "in NUMA mode.\n"); > printf(" --mbuf-size=N,[N1[,..Nn]: set the data size of mbuf to " > "N bytes. If multiple numbers are specified the extra pools " > - "will be created to receive with packet split features\n"); > + "will be created to receive with packet split features\n" > + "Use 'g' suffix for GPU memory.\n" > + "If no or an unrecognized suffix is provided, CPU is assumed\n"); Indentation looks wrong. Also, "g" is a bad suffix to append to a size, since it would indicate "gigabyte" in most size-based values. > + > printf(" --total-num-mbufs=N: set the number of mbufs to be allocated " > "in mbuf pools.\n"); > printf(" --max-pkt-len=N: set the maximum size of packet to N bytes.\n"); > @@ -594,6 +597,7 @@ launch_args_parse(int argc, char** argv) > struct rte_eth_dev_info dev_info; > uint16_t rec_nb_pkts; > int ret; > + uint32_t idx = 0; > > static struct option lgopts[] = { > { "help", 0, 0, 0 }, > @@ -1543,4 +1547,13 @@ launch_args_parse(int argc, char** argv) > "ignored\n"); > mempool_flags = 0; > } > + > + for (idx = 0; idx < mbuf_data_size_n; idx++) { > + if (mbuf_mem_types[idx] == MBUF_MEM_GPU && > + strcmp(cur_fwd_eng->fwd_mode_name, "io") != 0) { > + TESTPMD_LOG(ERR, > + "GPU memory mbufs can be used with iofwd engine only\n"); Again, no need to wrap here. > + rte_exit(EXIT_FAILURE, "Command line is incorrect\n"); > + } > + } > } > diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c > index ba65342b6d..b6c55e61b1 100644 > --- a/app/test-pmd/testpmd.c > +++ b/app/test-pmd/testpmd.c > @@ -206,6 +206,13 @@ uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */ > uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = { > DEFAULT_MBUF_DATA_SIZE > }; /**< Mbuf data space size. */ > + > +/* Mbuf memory types. */ > +enum mbuf_mem_type mbuf_mem_types[MAX_SEGS_BUFFER_SPLIT]; > +/* Pointers to external memory allocated for mempools. */ > +uintptr_t mempools_ext_ptr[MAX_SEGS_BUFFER_SPLIT]; > +size_t mempools_ext_size[MAX_SEGS_BUFFER_SPLIT]; > + > uint32_t param_total_num_mbufs = 0; /**< number of mbufs in all pools - if > * specified on command-line. */ > uint16_t stats_period; /**< Period to show statistics (disabled by default) */ > @@ -546,6 +553,12 @@ int proc_id; > */ > unsigned int num_procs = 1; > > +/* > + * In case of GPU memory external mbufs use, for simplicity, > + * the first GPU device in the list. > + */ > +int gpu_id = 0; > + > static void > eth_rx_metadata_negotiate_mp(uint16_t port_id) > { > @@ -1108,6 +1121,94 @@ setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id, > return ext_num; > } > > +static struct rte_mempool * > +gpu_mbuf_pool_create(__rte_unused uint16_t mbuf_seg_size, > + __rte_unused unsigned int nb_mbuf, > + __rte_unused unsigned int socket_id, > + __rte_unused uint16_t port_id, > + __rte_unused int gpu_id, > + __rte_unused uintptr_t *mp_addr, > + __rte_unused size_t *mp_size) > +{ > +#ifdef RTE_LIB_GPUDEV > + int ret = 0; > + char pool_name[RTE_MEMPOOL_NAMESIZE]; > + struct rte_eth_dev_info dev_info; > + struct rte_mempool *rte_mp = NULL; > + struct rte_pktmbuf_extmem gpu_mem; > + struct rte_gpu_info ginfo; > + uint8_t gpu_page_shift = 16; > + uint32_t gpu_page_size = (1UL << gpu_page_shift); > + > + if (rte_gpu_count_avail() == 0) > + rte_exit(EXIT_FAILURE, "No GPU device available.\n"); > + > + if (rte_gpu_info_get(gpu_id, &ginfo)) > + rte_exit(EXIT_FAILURE, > + "Can't retrieve info about GPU %d - bye\n", gpu_id); > + > + ret = eth_dev_info_get_print_err(port_id, &dev_info); > + if (ret != 0) > + rte_exit(EXIT_FAILURE, > + "Failed to get device info for port %d\n", > + port_id); > + > + mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), port_id, MBUF_MEM_GPU); > + if (!is_proc_primary()) { > + rte_mp = rte_mempool_lookup(pool_name); > + if (rte_mp == NULL) > + rte_exit(EXIT_FAILURE, > + "Get mbuf pool for socket %u failed: %s\n", > + socket_id, rte_strerror(rte_errno)); > + return rte_mp; > + } > + > + TESTPMD_LOG(INFO, > + "create a new mbuf pool <%s>: n=%u, size=%u, socket=%u GPU device=%s\n", > + pool_name, nb_mbuf, mbuf_seg_size, socket_id, ginfo.name); > + > + /* Create an external memory mempool using memory allocated on the GPU. */ > + > + gpu_mem.elt_size = RTE_MBUF_DEFAULT_BUF_SIZE; > + gpu_mem.buf_len = RTE_ALIGN_CEIL(nb_mbuf * gpu_mem.elt_size, gpu_page_size); > + gpu_mem.buf_iova = RTE_BAD_IOVA; > + > + gpu_mem.buf_ptr = rte_gpu_mem_alloc(gpu_id, gpu_mem.buf_len); > + if (gpu_mem.buf_ptr == NULL) > + rte_exit(EXIT_FAILURE, "Could not allocate GPU device memory\n"); > + > + ret = rte_extmem_register(gpu_mem.buf_ptr, gpu_mem.buf_len, > + NULL, gpu_mem.buf_iova, gpu_page_size); > + if (ret) > + rte_exit(EXIT_FAILURE, "Unable to register addr 0x%p, ret %d\n", gpu_mem.buf_ptr, ret); > + > + uint16_t pid = 0; > + > + RTE_ETH_FOREACH_DEV(pid) > + { > + ret = rte_dev_dma_map(dev_info.device, gpu_mem.buf_ptr, > + gpu_mem.buf_iova, gpu_mem.buf_len); > + if (ret) > + rte_exit(EXIT_FAILURE, "Unable to DMA map addr 0x%p for device %s\n", > + gpu_mem.buf_ptr, dev_info.device->name); > + } > + > + rte_mp = rte_pktmbuf_pool_create_extbuf(pool_name, nb_mbuf, > + mb_mempool_cache, 0, mbuf_seg_size, socket_id, &gpu_mem, 1); > + if (rte_mp == NULL) > + rte_exit(EXIT_FAILURE, > + "Creation of GPU mempool <%s> failed\n", pool_name); > + > + *mp_addr = (uintptr_t) gpu_mem.buf_ptr; > + *mp_size = (size_t) gpu_mem.buf_len; > + > + return rte_mp; > +#else > + rte_exit(EXIT_FAILURE, > + "gpudev library not built. Can't create mempools in GPU memory.\n"); > +#endif > +} > + > /* > * Configuration initialisation done once at init time. > */ > @@ -1122,7 +1223,7 @@ mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf, > > mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size; > #endif > - mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx); > + mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx, MBUF_MEM_CPU); > if (!is_proc_primary()) { > rte_mp = rte_mempool_lookup(pool_name); > if (rte_mp == NULL) > @@ -1709,19 +1810,42 @@ init_config(void) > > for (i = 0; i < num_sockets; i++) > for (j = 0; j < mbuf_data_size_n; j++) > - mempools[i * MAX_SEGS_BUFFER_SPLIT + j] = > - mbuf_pool_create(mbuf_data_size[j], > - nb_mbuf_per_pool, > - socket_ids[i], j); > + { brace should be on the same line as the "for". Given that "if - else" counts as one statement, the braces may not need to be added at all. > + if (mbuf_mem_types[j] == MBUF_MEM_GPU) { > + mempools[i * MAX_SEGS_BUFFER_SPLIT + j] = > + gpu_mbuf_pool_create(mbuf_data_size[j], > + nb_mbuf_per_pool, > + socket_ids[i], j, gpu_id, > + &(mempools_ext_ptr[i]), > + &(mempools_ext_size[i])); > + } else { > + mempools[i * MAX_SEGS_BUFFER_SPLIT + j] = > + mbuf_pool_create(mbuf_data_size[j], > + nb_mbuf_per_pool, > + socket_ids[i], j); > + } > + } > } else { > uint8_t i; > > for (i = 0; i < mbuf_data_size_n; i++) > - mempools[i] = mbuf_pool_create > - (mbuf_data_size[i], > - nb_mbuf_per_pool, > - socket_num == UMA_NO_CONFIG ? > - 0 : socket_num, i); > + { > + if (mbuf_mem_types[i] == MBUF_MEM_GPU) { > + mempools[i] = > + gpu_mbuf_pool_create(mbuf_data_size[i], > + nb_mbuf_per_pool, > + socket_num == UMA_NO_CONFIG ? 0 : socket_num, > + i, gpu_id, > + &(mempools_ext_ptr[i]), > + &(mempools_ext_size[i])); > + } else { > + mempools[i] = mbuf_pool_create(mbuf_data_size[i], > + nb_mbuf_per_pool, > + socket_num == UMA_NO_CONFIG ? > + 0 : socket_num, i); > + } > + } > + > } > > init_port_config(); > @@ -3434,9 +3558,45 @@ pmd_test_exit(void) > return; > } > } > + > for (i = 0 ; i < RTE_DIM(mempools) ; i++) { > - if (mempools[i]) > + if (mempools[i]) { > mempool_free_mp(mempools[i]); > +#ifdef RTE_LIB_GPUDEV > + if (mbuf_mem_types[i] == MBUF_MEM_GPU) { > + if (mempools_ext_ptr[i] != 0) { > + ret = rte_extmem_unregister( > + (void *)mempools_ext_ptr[i], > + mempools_ext_size[i]); > + > + if (ret) > + RTE_LOG(ERR, EAL, > + "rte_extmem_unregister 0x%p -> %d (rte_errno = %d)\n", > + (uint8_t *)mempools_ext_ptr[i], ret, rte_errno); > + > + RTE_ETH_FOREACH_DEV(pt_id) { > + struct rte_eth_dev_info dev_info; > + ret = eth_dev_info_get_print_err(pt_id, &dev_info); > + if (ret != 0) > + rte_exit(EXIT_FAILURE, > + "Failed to get device info for port %d\n", > + pt_id); > + > + ret = rte_dev_dma_unmap(dev_info.device, > + (void *)mempools_ext_ptr[i], RTE_BAD_IOVA, > + mempools_ext_size[i]); > + > + if (ret) > + RTE_LOG(ERR, EAL, > + "rte_dev_dma_unmap 0x%p -> %d (rte_errno = %d)\n", > + (uint8_t *)mempools_ext_ptr[i], ret, rte_errno); > + } > + > + rte_gpu_mem_free(gpu_id, (void *)mempools_ext_ptr[i]); > + } > + } > +#endif This block as very large amounts of indentation and nesting and should be reworked to reduce this. Aim for 4 or 5 levels of indent max. > + } > } > free(xstats_display); > > diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h > index b1dfd097c7..eca5b041d3 100644 > --- a/app/test-pmd/testpmd.h > +++ b/app/test-pmd/testpmd.h > @@ -9,6 +9,9 @@ > > #include > #include > +#ifdef RTE_LIB_GPUDEV > +#include > +#endif > #ifdef RTE_LIB_GRO > #include > #endif > @@ -484,6 +487,11 @@ extern uint8_t dcb_config; > extern uint32_t mbuf_data_size_n; > extern uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT]; > /**< Mbuf data space size. */ > +enum mbuf_mem_type { > + MBUF_MEM_CPU, > + MBUF_MEM_GPU > +}; > +extern enum mbuf_mem_type mbuf_mem_types[MAX_SEGS_BUFFER_SPLIT]; > extern uint32_t param_total_num_mbufs; > > extern uint16_t stats_period; > @@ -731,14 +739,16 @@ current_fwd_lcore(void) > /* Mbuf Pools */ > static inline void > mbuf_poolname_build(unsigned int sock_id, char *mp_name, > - int name_size, uint16_t idx) > + int name_size, uint16_t idx, enum mbuf_mem_type mem_type) > { > + const char *suffix = mem_type == MBUF_MEM_GPU ? "_gpu" : ""; > + > if (!idx) > snprintf(mp_name, name_size, > - MBUF_POOL_NAME_PFX "_%u", sock_id); > + MBUF_POOL_NAME_PFX "_%u%s", sock_id, suffix); > else > snprintf(mp_name, name_size, > - MBUF_POOL_NAME_PFX "_%hu_%hu", (uint16_t)sock_id, idx); > + MBUF_POOL_NAME_PFX "_%hu_%hu%s", (uint16_t)sock_id, idx, suffix); > } > > static inline struct rte_mempool * > @@ -746,7 +756,7 @@ mbuf_pool_find(unsigned int sock_id, uint16_t idx) > { > char pool_name[RTE_MEMPOOL_NAMESIZE]; > > - mbuf_poolname_build(sock_id, pool_name, sizeof(pool_name), idx); > + mbuf_poolname_build(sock_id, pool_name, sizeof(pool_name), idx, mbuf_mem_types[idx]); > return rte_mempool_lookup((const char *)pool_name); > } > > -- > 2.17.1 >