From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 26F98A04DB; Fri, 4 Sep 2020 20:35:44 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 018ACE07; Fri, 4 Sep 2020 20:35:44 +0200 (CEST) Received: from mail-pl1-f171.google.com (mail-pl1-f171.google.com [209.85.214.171]) by dpdk.org (Postfix) with ESMTP id 06D06DE0 for ; Fri, 4 Sep 2020 20:35:43 +0200 (CEST) Received: by mail-pl1-f171.google.com with SMTP id s10so1566756plp.1 for ; Fri, 04 Sep 2020 11:35:42 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=networkplumber-org.20150623.gappssmtp.com; s=20150623; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=qB9HJLwxH8VdLqrWew+j9CoQNm4quXPBhtY1Yk/nHjw=; b=umU4qTnYNc4vU14gevdHtZWyuh4lqr/yj4H+TfXkf4yrrcRaOHyasA+jJratxGbLcP DQoLYFzHunJ5qft54PpQV4MGOeEU6eabZimDXRZQGbp10Mj65LjJy9+twUTZdnQJiSO3 Mf3jwAmRlLnnc6/DqL0LiMSlvydPkO825WHfFAWq001KbQe7xO4Ar7e88l/Jrrimf9qj dvYuB6bUi4bcVoXCBj4E7o8o0KH/lLxroRK16rtSNxU8D7+Z15uWDSg/6HhrS+Wdk2lZ Q596X7ZXj1I0icv1fAwAqEViU2rCWS8Hd5tTdqcu3C4rbXPHadWd7COh8toF1YhQ77Zd 74IQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=qB9HJLwxH8VdLqrWew+j9CoQNm4quXPBhtY1Yk/nHjw=; b=EO90FUxbt+lCc+5Wm9gaw9Rp0U9/haR1ocMk+V0lNseiEMiKLZmHDYlO82d17C/WHw Rl4IUtKMn+wyx1hgCV1LmMKMQOSxc8xWkFKPxpVy9QlTlYO/4y84GPi7//q1F/OKl0K/ rJnUPASOKvSCZ9xYh+kzURPE4IPnQwhBvMgSHiL2A90139WK4JZu0LtWQn0ANqE0znEQ dUKrFqFuoQqsd7KDCglPjlkcNbp4fxTdt4QTgYag0R54irlce9R2PP3Ix4e2JX7p/mJz 5hCl1z0fQICDcQNizhu70/4Q4b+68MTq2srha85KaCun+DtGSrmci0uZXwXkJgF727VD Fb/g== X-Gm-Message-State: AOAM5314yOGRM4qAMK+vERq+XHfkuokhu4YCfQqNweZB/A6wTOJwdw5x geP5YafRUHF0XsqCt7mGx9TRpGERzTR1kg== X-Google-Smtp-Source: ABdhPJzCG6HTPYwKfp8LqRa0iEZGVOVgjaEYf/W0dLOfhGXm7awWt5+5LpBAnnm/gas7JU0Xq29uUg== X-Received: by 2002:a17:902:8646:: with SMTP id y6mr10147328plt.19.1599244541370; Fri, 04 Sep 2020 11:35:41 -0700 (PDT) Received: from hermes.lan (204-195-22-127.wavecable.com. [204.195.22.127]) by smtp.gmail.com with ESMTPSA id 67sm7657833pfv.173.2020.09.04.11.35.40 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Fri, 04 Sep 2020 11:35:40 -0700 (PDT) From: Stephen Hemminger To: dev@dpdk.org Cc: Stephen Hemminger Date: Fri, 4 Sep 2020 11:35:33 -0700 Message-Id: <20200904183533.19509-1-stephen@networkplumber.org> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20200901165643.15668-1-stephen@networkplumber.org> References: <20200901165643.15668-1-stephen@networkplumber.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [dpdk-dev] [PATCH] usertools: add huge page setup script X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This is an improved version of the setup of huge pages bases on earlier DPDK setup. Differences are: * it autodetects NUMA vs non NUMA * it allows setting different page sizes recent kernels support multiple sizes. * it accepts a parameter in bytes (not pages). If necessary the steps of clearing old settings and mounting/umounting can be done individually. Signed-off-by: Stephen Hemminger --- v3 -- incorporate review feedback add missing SPDX and env header overengineer the memory prefix string code add numa node argument fix some pylint warnings v2 -- convert to python3 usertools/hugepage-setup.py | 326 ++++++++++++++++++++++++++++++++++++ 1 file changed, 326 insertions(+) create mode 100755 usertools/hugepage-setup.py diff --git a/usertools/hugepage-setup.py b/usertools/hugepage-setup.py new file mode 100755 index 000000000000..9fe1422c5a68 --- /dev/null +++ b/usertools/hugepage-setup.py @@ -0,0 +1,326 @@ +#! /usr/bin/env python3 +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2020 Microsoft Corporation +# +# Script to query and setup huge pages for DPDK applications. + +import sys +import os +import re +import getopt +import glob +from os.path import exists, basename +from math import log2 + +# systemd mount point for huge pages +HUGEDIR = '/dev/hugepages' + +# Standard binary prefix +BINARY_PREFIX = "KMG" + +# command-line flags +show_flag = None +reserve_kb = None +clear_flag = None +hugepagesize_kb = None +mount_flag = None +unmount_flag = None +numa_node = None + + +def usage(): + '''Print usage information for the program''' + mnt = HUGEDIR + argv0 = basename(sys.argv[0]) + print(""" +Usage: +------ + %(argv0)s [options] + +Options: + --help, --usage: + Display usage information and quit + + -s, --show: + Print the current huge page configuration. + + --setup: + Simplified version of clear, umount, reserve, mount operations + + -c, --clear: + Remove all huge pages + + -r, --reserve: + Reserve huge pages. The size specified is in bytes, with + optional K, M or G suffix. The size must be a multiple + of the page size. + + -p, --pagesize + Choose page size to use. If not specified, the default + system page size will be used. + + -n, --node + Select numa node to reserve pages on. + If not specified, pages will be reserved on all nodes. + + -m, --mount + Mount the system huge page directory %(mnt)s + + -u, --umount + Unmount the system huge page directory %(mnt)s + + +Examples: +--------- + +To display current huge page settings: + %(argv0)s -s + +To a complete setup of with 2 Gigabyte of 1G huge pages: + %(argv0)s -p 1G --setup 2G + +Equivalent to: + %(argv0)s -p 1G -c -u -r 2G -m + +To clear existing huge page settings and umount %(mnt)s + %(argv0)s -c -u + + """ % locals()) + + +def fmt_memsize(sz_k): + '''Format memory size in kB into conventional format''' + if sz_k < 1024: + return sz_k + l = int(log2(sz_k) / 10) + return '{}{}b'.format(int(sz_k / (2**(l * 10))), BINARY_PREFIX[l]) + + +def get_memsize(arg): + '''Convert memory size with suffix to kB''' + m = re.match(r'(\d+)([' + BINARY_PREFIX + r']?)$', arg.upper()) + if m is None: + sys.exit('{} is not a valid page size'.format(arg)) + num = float(m.group(1)) + suffix = m.group(2) + if suffix == "": + return int(num / 1024) + idx = BINARY_PREFIX.find(suffix) + return int(num * (2**(idx * 10))) + + +def is_numa(): + '''Test if NUMA is necessary on this system''' + return exists('/sys/devices/numa/node') + + +def get_hugepages(path): + '''Read number of reserved pages''' + with open(path + '/nr_hugepages') as f: + return int(f.read()) + return 0 + + +def show_numa_pages(): + print('Node Pages Size') + for n in glob.glob('/sys/devices/system/node/node*'): + path = n + '/hugepages' + node = n[29:] # slice after /sys/devices/system/node/node + for d in os.listdir(path): + sz = int(d[10:-2]) # slice out of hugepages-NNNkB + nr_pages = get_hugepages(path + '/' + d) + if nr_pages > 0: + pg_sz = fmt_memsize(sz) + print('{:<4} {:<5} {}'.format(node, nr_pages, pg_sz)) + + +def show_non_numa_pages(): + print('Pages Size') + path = '/sys/kernel/mm/hugepages' + for d in os.listdir(path): + sz = int(d[10:-2]) + nr_pages = get_hugepages(path + '/' + d) + if nr_pages > 0: + pg_sz = fmt_memsize(sz) + print('{:<5} {}'.format(nr_pages, pg_sz)) + + +def show_pages(): + '''Show existing huge page settings''' + if is_numa(): + show_numa_pages() + else: + show_non_numa_pages() + + +def clear_numa_pages(): + for path in glob.glob( + '/sys/devices/system/node/node*/hugepages/hugepages-*'): + with open(path + '/nr_hugepages', 'w') as f: + f.write('\n0') + + +def clear_non_numa_pages(): + for path in glob.glob('/sys/kernel/mm/hugepages/hugepages-*'): + with open(path + '/nr_hugepages', 'w') as f: + f.write('0\n') + + +def clear_pages(): + '''Clear all existing huge page mappings''' + if is_numa(): + clear_numa_pages() + else: + clear_non_numa_pages() + + +def default_size(): + '''Get default huge page size from /proc/meminfo''' + with open('/proc/meminfo') as f: + for line in f: + if line.startswith('Hugepagesize:'): + return int(line.split()[1]) + return None + + +def set_numa_pages(nr_pages, hugepgsz): + if numa_node: + nodes = ['/sys/devices/system/node/node{}/hugepages'.format(numa_node)] + else: + nodes = glob.glob('/sys/devices/system/node/node*/hugepages') + + for n in nodes: + path = '{}/hugepages-{}kB/nr_hugepages'.format(n, hugepgsz) + if not exists(path): + sys.exit( + '{}Kb is not a valid system huge page size'.format(hugepgsz)) + with open(path, 'w') as f: + f.write('{}\n'.format(nr_pages)) + + +def set_non_numa_pages(nr_pages, hugepgsz): + path = '/sys/kernel/mm/hugepages/hugepages-{}kB/nr_hugepages'.format( + hugepgsz) + if not exists(path): + sys.exit('{}Kb is not a valid system huge page size'.format(hugepgsz)) + + with open(path, 'w') as f: + f.write('{}\n'.format(nr_pages)) + + +def set_pages(pages, hugepgsz): + '''Sets the number of huge pages to be reserved''' + if is_numa(): + set_numa_pages(pages, hugepgsz) + else: + set_non_numa_pages(pages, hugepgsz) + + +def mount_huge(pagesize): + cmd = "mount -t hugetlbfs" + if pagesize: + cmd += ' -o pagesize={}'.format(pagesize) + cmd += ' nodev {}'.format(HUGEDIR) + os.system(cmd) + + +def show_mount(): + mounted = None + with open('/proc/mounts') as f: + for line in f: + fields = line.split() + if fields[2] != 'hugetlbfs': + continue + if not mounted: + print("Hugepages mounted on:", end=" ") + mounted = True + print(fields[1], end=" ") + if mounted: + print() + else: + print("Hugepages not mounted") + + +def parse_args(): + '''Parses the command-line arguments given by the user and takes the + appropriate action for each''' + global clear_flag + global hugepagesize_kb + global mount_flag + global numa_node + global reserve_kb + global show_flag + global unmount_flag + + if len(sys.argv) <= 1: + usage() + sys.exit(0) + + try: + opts, args = getopt.getopt(sys.argv[1:], "r:p:csmun:", [ + "help", "usage", "show", "clear", "setup=", "reserve=", + "pagesize=", "node=", "mount", "unmount" + ]) + except getopt.GetoptError as error: + print(str(error)) + print("Run '%s --usage' for further information" % sys.argv[0]) + sys.exit(1) + + for opt, arg in opts: + if opt in ('--help', '--usage'): + usage() + sys.exit(0) + elif opt == '--setup': + clear_flag = True + unmount_flag = True + reserve_kb = get_memsize(arg) + mount_flag = True + elif opt in ('--show', '-s'): + show_flag = True + elif opt in ('--clear', '-c'): + clear_flag = True + elif opt in ('--reserve', '-r'): + reserve_kb = get_memsize(arg) + elif opt in ('--pagesize', '-p'): + hugepagesize_kb = get_memsize(arg) + elif opt in ('--unmount', '-u'): + unmount_flag = True + elif opt in ('--mount', '-m'): + mount_flag = True + elif opt in ('--node', '-n'): + if not arg.isdigit(): + sys.exit('Numeric value for numa node expected') + numa_node = arg + + +def do_arg_actions(): + '''do the actual action requested by the user''' + global hugepagesize_kb + + if clear_flag: + clear_pages() + if unmount_flag: + os.system("umount " + HUGEDIR) + if reserve_kb: + if hugepagesize_kb is None: + hugepagesize_kb = default_size() + if reserve_kb % hugepagesize_kb != 0: + sys.exit('{} is not a multiple of page size {}'.format( + reserve_kb, hugepagesize_kb)) + nr_pages = int(reserve_kb / hugepagesize_kb) + set_pages(nr_pages, hugepagesize_kb) + if mount_flag: + mount_huge(hugepagesize_kb * 1024) + if show_flag: + show_pages() + print() + show_mount() + + +def main(): + parse_args() + do_arg_actions() + + +if __name__ == "__main__": + main() -- 2.27.0