From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by dpdk.org (Postfix) with ESMTP id D31DAF04 for ; Mon, 18 Sep 2017 22:44:48 +0200 (CEST) Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 18 Sep 2017 13:44:47 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.42,414,1500966000"; d="scan'208";a="901461776" Received: from fyigit-mobl1.ger.corp.intel.com (HELO [10.237.220.57]) ([10.237.220.57]) by FMSMGA003.fm.intel.com with ESMTP; 18 Sep 2017 13:44:43 -0700 References: <1481570642-15138-1-git-send-email-lboccass@brocade.com> <1481647672-9187-1-git-send-email-lboccass@brocade.com> From: Ferruh Yigit To: Christian Ehrhardt , lboccass@brocade.com, stefan.bader@canonical.com Cc: dev@dpdk.org Message-ID: Date: Mon, 18 Sep 2017 21:44:41 +0100 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Thunderbird/52.3.0 MIME-Version: 1.0 In-Reply-To: <1481647672-9187-1-git-send-email-lboccass@brocade.com> Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 8bit Subject: Re: [dpdk-dev] [PATCH v2] SDK: Add scripts to initialize DPDK runtime X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 18 Sep 2017 20:44:49 -0000 On 12/13/2016 4:47 PM, lboccass at brocade.com (Luca Boccassi) wrote: > From: Christian Ehrhardt > > A tools/init directory is added with dpdk-init, a script that can be > used to initialize a DPDK runtime environment. 2 config files with > default options, dpdk.conf and interfaces, are provided as well > together with a SysV init script and a systemd service unit. I guess this patch missing comments, it is good to have helper scripts (and v2 has BSD license). Is this correct: This script runs as service and does following based on config: mount_hugetlbfs setup_hugepages bind_interfaces So keeps interfaces binded to DPDK after reboot? [intentional leaving rest of the patch since this is an old patch] > > v2: relicensed dpdk-init.in from GPL3 to BSD-3-clause with authors' > permission > > Signed-off-by: Luca Boccassi > Signed-off-by: Christian Ehrhardt > --- > mk/rte.sdkinstall.mk | 21 ++++ > tools/init/dpdk-init.in | 274 +++++++++++++++++++++++++++++++++++++++++++++ > tools/init/dpdk.conf | 60 ++++++++++ > tools/init/dpdk.init.in | 57 ++++++++++ > tools/init/dpdk.service.in | 12 ++ > tools/init/interfaces | 16 +++ > 6 files changed, 440 insertions(+) > create mode 100755 tools/init/dpdk-init.in > create mode 100644 tools/init/dpdk.conf > create mode 100755 tools/init/dpdk.init.in > create mode 100644 tools/init/dpdk.service.in > create mode 100644 tools/init/interfaces > > diff --git a/mk/rte.sdkinstall.mk b/mk/rte.sdkinstall.mk > index 7b0d8b5..a3a5a9a 100644 > --- a/mk/rte.sdkinstall.mk > +++ b/mk/rte.sdkinstall.mk > @@ -69,6 +69,14 @@ datadir ?= $(datarootdir)/dpdk > mandir ?= $(datarootdir)/man > sdkdir ?= $(datadir) > targetdir ?= $(datadir)/$(RTE_TARGET) > +# If pkgconfig or systemd.pc are not available fall back to most likely default > +ifeq ($(shell pkg-config systemd; echo $$?), 0) > +systemduserunitdir ?= $(shell pkg-config --variable=systemdsystemunitdir systemd) > +else > +systemduserunitdir ?= /lib/systemd/system > +endif > +initdir ?= /etc/init.d > +configdir ?= /etc/dpdk > > # The install directories may be staged in DESTDIR > > @@ -162,6 +170,19 @@ install-sdk: > $(Q)cp -a $O/app/dpdk-pmdinfogen $(DESTDIR)$(targetdir)/app > $(Q)$(call rte_symlink, $(DESTDIR)$(includedir), $(DESTDIR)$(targetdir)/include) > $(Q)$(call rte_symlink, $(DESTDIR)$(libdir), $(DESTDIR)$(targetdir)/lib) > + $(Q)$(call rte_mkdir, $(DESTDIR)$(initdir)) > + $(Q)sed -e "s|@@configdir@@|$(configdir)|g" -e "s|@@sbindir@@|$(sbindir)|g" \ > + $(RTE_SDK)/tools/init/dpdk.init.in > $(DESTDIR)$(initdir)/dpdk > + $(Q)chmod +x $(DESTDIR)$(initdir)/dpdk > + $(Q)$(call rte_mkdir, $(DESTDIR)$(systemduserunitdir)) > + $(Q)sed "s|@@sbindir@@|$(sbindir)|g" $(RTE_SDK)/tools/init/dpdk.service.in > \ > + $(DESTDIR)$(systemduserunitdir)/dpdk.service > + $(Q)$(call rte_mkdir, $(DESTDIR)$(configdir)) > + $(Q)cp -a $(RTE_SDK)/tools/init/dpdk.conf $(DESTDIR)$(configdir) > + $(Q)cp -a $(RTE_SDK)/tools/init/interfaces $(DESTDIR)$(configdir) > + $(Q)sed -e "s|@@configdir@@|$(configdir)|g" -e "s|@@sbindir@@|$(sbindir)|g" \ > + $(RTE_SDK)/tools/init/dpdk-init.in > $(DESTDIR)$(sbindir)/dpdk-init > + $(Q)chmod +x $(DESTDIR)$(sbindir)/dpdk-init > > install-doc: > ifneq ($(wildcard $O/doc/html),) > diff --git a/tools/init/dpdk-init.in b/tools/init/dpdk-init.in > new file mode 100755 > index 0000000..a1a44f7 > --- /dev/null > +++ b/tools/init/dpdk-init.in > @@ -0,0 +1,274 @@ > +#!/bin/sh > +# > +# dpdk-init: startup script to initialize a dpdk runtime environment > +# > +# Autor: Stefan Bader > +# Autor: Christian Ehrhardt > +# > +# BSD LICENSE > +# > +# Copyright(c) 2015-2016 Canonical Ltd. All rights reserved. > +# All rights reserved. > +# > +# Redistribution and use in source and binary forms, with or without > +# modification, are permitted provided that the following conditions > +# are met: > +# > +# * Redistributions of source code must retain the above copyright > +# notice, this list of conditions and the following disclaimer. > +# * Redistributions in binary form must reproduce the above copyright > +# notice, this list of conditions and the following disclaimer in > +# the documentation and/or other materials provided with the > +# distribution. > +# * Neither the name of Intel Corporation nor the names of its > +# contributors may be used to endorse or promote products derived > +# from this software without specific prior written permission. > +# > +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > +# > +set -e > + > +DPDK_BIND="@@sbindir@@/dpdk-devbind" > +DPDK_INTERF="@@configdir@@/interfaces" > +DPDK_CONF="@@configdir@@/dpdk.conf" > + > + > +# pagesize supports [G|g]/[M|m]/[K|k] > +get_kbytes() { > + local unit > + local num > + unit=$(echo "${1}" | sed 's/[0-9]*//g') > + num=$(echo "${1}" | sed 's/[^0-9]*//g') > + case ${unit} in > + *g | *G) > + echo $((num*1024*1024)) > + ;; > + *m | *M) > + echo $((num*1024)) > + ;; > + *k | *K) > + echo $((num)) > + ;; > + *) > + echo $((num/1024)) > + ;; > + esac > +} > + > +get_default_hpgsz() { > + default_hpgsz=$(grep "Hugepagesize:" /proc/meminfo \ > + | sed 's/^Hugepagesize:\s*//g' | sed 's/\s*kB$//g') > + echo "${default_hpgsz}" > +} > + > +get_hugetlbfs_mountpoint() { > + local requested_hpgsz > + local mp_hpgsz > + requested_hpgsz=$(get_kbytes "${1}") > + > + grep hugetlbfs /proc/mounts | while read \ > + mntfrom mntpoint mntfstype mntopt mntdump mntfsck; do > + > + # check if the current muntpoint is of the requested huge page size > + case ${mntopt} in > + *pagesize=*) > + mp_hpgsz=$(echo "${mntopt}" | sed 's/.*pagesize=//g' | sed 's/,.*//g') > + mp_hpgsz=$(get_kbytes "${mp_hpgsz}") > + ;; > + *) > + mp_hpgsz=$(get_default_hpgsz) > + ;; > + esac > + if [ "${requested_hpgsz}" -eq "${mp_hpgsz}" ]; then > + echo "${mntpoint}" > + return > + fi > + done > +} > + > +_mount_hugetlbfs() { > + local MNT="/dev/hugepages" > + local MNTOPTS="" > + local requested_hpgsz > + local default_hpgsz > + requested_hpgsz=$(get_kbytes "${1}") > + default_hpgsz=$(get_default_hpgsz) > + > + # kernel might not support the requested size > + if [ ! -d "/sys/kernel/mm/hugepages/hugepages-${requested_hpgsz}kB" ]; then > + echo "WARNING: requested page size of ${requested_hpgsz}kB " \ > + "not supported by the kernel" > + return 0 > + fi > + > + # special case if this is not the default huge page size > + if [ "${requested_hpgsz}" -ne "${default_hpgsz}" ]; then > + MNT="${MNT}-${requested_hpgsz}" > + MNTOPTS="pagesize=${requested_hpgsz}K" > + fi > + > + if [ ! -e "${MNT}" ]; then > + mkdir "${MNT}" > + if [ $? -ne 0 ]; then > + echo "Could not create directory ${MNT}!" >&2 > + return 1 > + fi > + fi > + mount -thugetlbfs hugetlbfs "${MNT}" -o "${MNTOPTS}" > + return $? > +} > + > +# > +# The DPDK library will use the first mounted instance it finds for a given > +# page size. so if there is already one for a given size there is no need to > +# create another for the same huge page size. > +# > +mount_hugetlbfs() { > + if [ ! -r "$DPDK_CONF" ]; then > + return 1 > + fi > + . "$DPDK_CONF" > + > + # if a page size is requested, there has to be a mountpoint for that size > + if [ -n "${NR_2M_PAGES}" -a -z "$(get_hugetlbfs_mountpoint '2M')" ]; then > + _mount_hugetlbfs 2M > + fi > + if [ -n "${NR_16M_PAGES}" -a -z "$(get_hugetlbfs_mountpoint '16M')" ]; then > + _mount_hugetlbfs 16M > + fi > + if [ -n "${NR_1G_PAGES}" -a -z "$(get_hugetlbfs_mountpoint '1G')" ]; then > + _mount_hugetlbfs 1G > + fi > +} > + > +_setup_hugepages() { > + MMDIR="/sys/kernel/mm/hugepages/${1}" > + PAGES=${2} > + > + if [ "$PAGES" != "" ]; then > + if [ "$PAGES" -gt 0 ]; then > + if [ -d "$MMDIR" -a -w "$MMDIR/nr_hugepages" ]; then > + # increases the chance to allocate enough huge pages > + # configurable, since it comes at a perf penality > + if [ "$DROPCACHE_BEFORE_HP_ALLOC" = "1" ]; then > + echo 3 > /proc/sys/vm/drop_caches > + fi > + > + echo "$PAGES" > "$MMDIR/nr_hugepages" > + > + GOTPAGES=$(cat "$MMDIR/nr_hugepages") > + if [ "$GOTPAGES" -lt "$PAGES" ]; then > + echo "WARNING: could not allocate $PAGES at " \ > + "$MMDIR/nr_hugepages (only got $GOTPAGES)." > + fi > + else > + echo "WARNING: $MMDIR/nr_hugepages not found/writable" > + fi > + fi > + fi > +} > + > +# > +# Reserve a certain amount of hugepages (defined in /etc/dpdk.conf) > +# > +setup_hugepages() { > + if [ ! -r "$DPDK_CONF" ]; then > + return 1 > + fi > + . "$DPDK_CONF" > + > + _setup_hugepages "hugepages-2048kB" "$NR_2M_PAGES" > + _setup_hugepages "hugepages-16384kB" "$NR_16M_PAGES" > + _setup_hugepages "hugepages-1048576kB" "$NR_1G_PAGES" > + > + # dpdk uses 2*#hugepages mappings, increase for huge systems LP #1507921 > + if [ -d /sys/kernel/mm/hugepages ]; then > + max_map_count=$(awk -v pad=65530 '{tot+=$1}END{print tot*2+pad}' \ > + /sys/kernel/mm/hugepages/hugepages-*/nr_hugepages) > + sysctl -q vm.max_map_count="${max_map_count:-65530}" > + fi > + > + return 0 > +} > + > +# > +# Allow NICs to be automatically bound to DPDK compatible drivers on boot. > +# > +bind_interfaces() { > + if [ ! -r "$DPDK_INTERF" ]; then > + return 0 > + fi > + grep -v '^[ \t]*#' "$DPDK_INTERF" | while read BUS ID MOD; do > + if [ "$BUS" = "" -o "$ID" = "" -o "$MOD" = "" ]; then > + echo "WARNING: incomplete spec in $DPDK_INTERF" \ > + " - BUS '$BUS' ID '$ID' MOD '$MOD'" > + continue > + fi > + if [ "$BUS" != "pci" ]; then > + echo "WARNING: incompatible bus '$BUS' in $DPDK_INTERF" > + continue > + fi > + > + SYSFSPATH="/sys/bus/$BUS/devices/$ID" > + if [ ! -e "$SYSFSPATH" ]; then > + echo "WARNING: invalid pci ID '$ID' in $DPDK_INTERF" \ > + " - '$SYSFSPATH' does not exist" > + continue > + fi > + if [ -L "$SYSFSPATH/driver" ]; then > + CUR=$(readlink "$SYSFSPATH/driver") > + CUR=$(basename "$CUR") > + else > + # device existing, but currently unregistered > + CUR="" > + fi > + if [ "$MOD" != "$CUR" ]; then > + modprobe -q "$MOD" || true > + # cloud img have no linux-image-extra initially (uip_pci_generic) > + # so check if the module is available (loadable/built in) > + if [ -e "/sys/bus/pci/drivers/${MOD}" ]; then > + echo "Reassigning pci:$ID to $MOD" > + $DPDK_BIND -b "$MOD" "$ID" > + else > + echo "Warning: failed assigning pci:$ID," \ > + " module $MOD not available" > + fi > + else > + echo "pci:$ID already assigned to $MOD" > + fi > + done > +} > + > + > + > +case "$1" in > +start) > + mount_hugetlbfs > + setup_hugepages > + bind_interfaces > + ;; > +stop) > + ;; > +reload|force-reload) > + setup_hugepages > + bind_interfaces > + ;; > +status) > + $DPDK_BIND --status > + ;; > +*) > + echo "Usage: $0 {start|stop|reload|force-reload|status}" > + exit 1 > + ;; > +esac > + > diff --git a/tools/init/dpdk.conf b/tools/init/dpdk.conf > new file mode 100644 > index 0000000..a5aea86 > --- /dev/null > +++ b/tools/init/dpdk.conf > @@ -0,0 +1,60 @@ > +# > +# The number of 2M hugepages to reserve on system boot > +# > +# Default is 0 > +# To e.g. let it reserve 128M via 64x 2M Hugepages set: > +# NR_2M_PAGES=64 > + > +# > +# The number of 1G hugepages to reserve on system boot > +# > +# Default is 0 > +# To e.g. let it reserve 2G via 2x 1G Hugepages set: > +# NR_1G_PAGES=2 > + > +# The number of 16M hugepages to reserve, supported e.g. on ppc64el > +# > +# Default is 0 > +# To e.g. let it reserve 512M via 32x 16M Hugepages set: > +# NR_16M_PAGES=32 > + > +# > +# Dropping slab and pagecache can help to successfully allocate hugepages, > +# especially later in the lifecycle of a system. > +# This comes at the cost of loosing all slab and pagecache on (re)start > +# of the dpdk service - therefore the default is off. > +# > +# Default is 0 > +# Set to 1 to enable it > +#DROPCACHE_BEFORE_HP_ALLOC=0 > + > +# The DPDK library will use the first mounted hugetlbfs. > +# The init scripts try to ensure there is at least one default hugetlbfs > +# mountpoint on start. > +# If you have multiple hugetlbfs mountpoints for a complex (e.g. specific numa > +# policies) setup it should be controlled by the admin instead of this init > +# script. In that case specific mountpoints can be provided as parameters to > +# the DPDK library. > + > +# Hardware may support other granularities of hugepages (like 4M). But the > +# larger the hugepages the earlier those should be allocated. > +# Note: the dpdk init scripts will report warnings, but not fail if they could > +# not allocate the requested amount of hugepages. > +# The more or the larger the hugepages to be allocated are, the more it is > +# recommended to do the reservation as kernel commandline arguments. > +# To do so edit /etc/default/grub: GRUB_CMDLINE_LINUX_DEFAULT > +# and add [hugepagesz=xx] hugepages=yy ... > +# > +# Kernel commandline config: > +# hugepagesz sets the size for the next hugepages reservation (default 2M) > +# hugepages reserves the given number of hugepages of the size set before > +# > +# After modifying /etc/default/grub, the command "update-grub" has to be > +# run in order to re-generate the grub config files. The new values will > +# be used after next reboot. > +# > +# example: > +# GRUB_CMDLINE_LINUX_DEFAULT="... hugepages=16 hugepagesz=1G hugepages=2" > +# > +# If the system supports it, this will reserve 16x 2M pages and 2x 1G pages. > +# > diff --git a/tools/init/dpdk.init.in b/tools/init/dpdk.init.in > new file mode 100755 > index 0000000..1e26450 > --- /dev/null > +++ b/tools/init/dpdk.init.in > @@ -0,0 +1,57 @@ > +#!/bin/sh > + > +### BEGIN INIT INFO > +# Provides: dpdk > +# Required-Start: $remote_fs $local_fs > +# Required-Stop: $remote_fs $local_fs > +# Default-Start: S > +# Default-Stop: 0 1 6 > +# Short-Description: start dpdk runtime environment > +### END INIT INFO > + > +set -e > + > +PATH="/sbin:/bin:/usr/bin" > + > +[ -d @@configdir@@ ] || exit 0 > + > +# Define LSB log_* functions. > +# Depend on lsb-base (>= 3.2-14) to ensure that this file is present > +# and status_of_proc is working. > +. /lib/lsb/init-functions > + > +error=0 > +case "$1" in > +start) > + log_action_begin_msg "Starting DPDK environment" "dpdk" > + output=$(@@sbindir@@/dpdk-init start 2>&1) || error="$?" > + if [ ! -z "$output" ]; then > + echo "$output" | while read line; do > + log_action_cont_msg "$line" > + done > + fi > + log_action_end_msg $error > + exit $error > + ;; > +stop) > + ;; > +restart|force-reload) > + ;; > +status) > + output=$(@@sbindir@@/dpdk-init --status 2>&1) || error="$?" > + if [ ! -z "$output" ]; then > + echo "$output" | while read line; do > + log_action_cont_msg "$line" > + done > + fi > + log_action_end_msg $error > + exit $error > + ;; > +*) > + echo "Usage: $0 {start|stop|restart|force-reload|status}" > + exit 1 > + ;; > +esac > + > +exit 0 > + > diff --git a/tools/init/dpdk.service.in b/tools/init/dpdk.service.in > new file mode 100644 > index 0000000..1968081 > --- /dev/null > +++ b/tools/init/dpdk.service.in > @@ -0,0 +1,12 @@ > +[Unit] > +Description=DPDK runtime environment > +DefaultDependencies=false > +After=network-pre.target local-fs.target > + > +[Service] > +Type=oneshot > +RemainAfterExit=yes > +ExecStart=@@sbindir@@/dpdk-init start > + > +[Install] > +WantedBy=multi-user.target > diff --git a/tools/init/interfaces b/tools/init/interfaces > new file mode 100644 > index 0000000..73c3fca > --- /dev/null > +++ b/tools/init/interfaces > @@ -0,0 +1,16 @@ > +# > +# Currently only "pci" is supported > +# Device ID on the specified bus > +# Driver to bind against (vfio-pci, uio_pci_generic, igb_uio or > +# rte_kni) > +# > +# Be aware that the two dpdk compatible drivers uio_pci_generic and vfio-pci are > +# part of linux-image-extra- package on Debian-based distributions. > +# This package is not always installed by default - for example in cloud-images. > +# So please install it in case you run into missing module issues. > +# > +# > +# pci 0000:04:00.0 vfio-pci > +# pci 0000:04:00.1 uio_pci_generic > +# pci 0000:05:00.0 igb_uio > +# pci 0000:06:00.0 rte_kni >