DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] SDK: Add scripts to initialize DPDK runtime
@ 2016-12-12 19:24 Luca Boccassi
  2016-12-12 21:12 ` Bruce Richardson
  2016-12-13 16:47 ` [dpdk-dev] [PATCH v2] " Luca Boccassi
  0 siblings, 2 replies; 17+ messages in thread
From: Luca Boccassi @ 2016-12-12 19:24 UTC (permalink / raw)
  To: dev; +Cc: Christian Ehrhardt, Luca Boccassi

From: Christian Ehrhardt <christian.ehrhardt@canonical.com>

A tools/init directory is added with dpdk-init, a script that can be
used to initialize a DPDK runtime environment. 2 config files with
default options, dpdk.conf and interfaces, are provided as well
together with a SysV init script and a systemd service unit.

Signed-off-by: Luca Boccassi <lboccass@brocade.com>
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
---
 mk/rte.sdkinstall.mk       |  21 ++++
 tools/init/dpdk-init.in    | 256 +++++++++++++++++++++++++++++++++++++++++++++
 tools/init/dpdk.conf       |  60 +++++++++++
 tools/init/dpdk.init.in    |  57 ++++++++++
 tools/init/dpdk.service.in |  12 +++
 tools/init/interfaces      |  16 +++
 6 files changed, 422 insertions(+)
 create mode 100755 tools/init/dpdk-init.in
 create mode 100644 tools/init/dpdk.conf
 create mode 100755 tools/init/dpdk.init.in
 create mode 100644 tools/init/dpdk.service.in
 create mode 100644 tools/init/interfaces

diff --git a/mk/rte.sdkinstall.mk b/mk/rte.sdkinstall.mk
index 7b0d8b5..a3a5a9a 100644
--- a/mk/rte.sdkinstall.mk
+++ b/mk/rte.sdkinstall.mk
@@ -69,6 +69,14 @@ datadir     ?=       $(datarootdir)/dpdk
 mandir      ?=       $(datarootdir)/man
 sdkdir      ?=                $(datadir)
 targetdir   ?=                $(datadir)/$(RTE_TARGET)
+# If pkgconfig or systemd.pc are not available fall back to most likely default
+ifeq ($(shell pkg-config systemd; echo $$?), 0)
+systemduserunitdir ?= $(shell pkg-config --variable=systemdsystemunitdir systemd)
+else
+systemduserunitdir ?= /lib/systemd/system
+endif
+initdir     ?= /etc/init.d
+configdir   ?= /etc/dpdk
 
 # The install directories may be staged in DESTDIR
 
@@ -162,6 +170,19 @@ install-sdk:
 	$(Q)cp -a               $O/app/dpdk-pmdinfogen   $(DESTDIR)$(targetdir)/app
 	$(Q)$(call rte_symlink, $(DESTDIR)$(includedir), $(DESTDIR)$(targetdir)/include)
 	$(Q)$(call rte_symlink, $(DESTDIR)$(libdir),     $(DESTDIR)$(targetdir)/lib)
+	$(Q)$(call rte_mkdir,                            $(DESTDIR)$(initdir))
+	$(Q)sed -e "s|@@configdir@@|$(configdir)|g" -e "s|@@sbindir@@|$(sbindir)|g" \
+		$(RTE_SDK)/tools/init/dpdk.init.in > $(DESTDIR)$(initdir)/dpdk
+	$(Q)chmod +x                                     $(DESTDIR)$(initdir)/dpdk
+	$(Q)$(call rte_mkdir,                            $(DESTDIR)$(systemduserunitdir))
+	$(Q)sed "s|@@sbindir@@|$(sbindir)|g" $(RTE_SDK)/tools/init/dpdk.service.in > \
+		$(DESTDIR)$(systemduserunitdir)/dpdk.service
+	$(Q)$(call rte_mkdir,                            $(DESTDIR)$(configdir))
+	$(Q)cp -a               $(RTE_SDK)/tools/init/dpdk.conf  $(DESTDIR)$(configdir)
+	$(Q)cp -a               $(RTE_SDK)/tools/init/interfaces $(DESTDIR)$(configdir)
+	$(Q)sed -e "s|@@configdir@@|$(configdir)|g" -e "s|@@sbindir@@|$(sbindir)|g" \
+		$(RTE_SDK)/tools/init/dpdk-init.in > $(DESTDIR)$(sbindir)/dpdk-init
+	$(Q)chmod +x                                     $(DESTDIR)$(sbindir)/dpdk-init
 
 install-doc:
 ifneq ($(wildcard $O/doc/html),)
diff --git a/tools/init/dpdk-init.in b/tools/init/dpdk-init.in
new file mode 100755
index 0000000..89e0399
--- /dev/null
+++ b/tools/init/dpdk-init.in
@@ -0,0 +1,256 @@
+#!/bin/sh
+#
+# dpdk-init: startup script to initialize a dpdk runtime environment
+#
+# Copyright 2015-2016 Canonical Ltd.
+# Autor: Stefan Bader <stefan.bader@canonical.com>
+# Autor: Christian Ehrhardt <christian.ehrhardt@canonical.com>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License version 3,
+#    as published by the Free Software Foundation.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+set -e
+
+DPDK_BIND="@@sbindir@@/dpdk-devbind"
+DPDK_INTERF="@@configdir@@/interfaces"
+DPDK_CONF="@@configdir@@/dpdk.conf"
+
+
+# pagesize supports [G|g]/[M|m]/[K|k]
+get_kbytes() {
+    local unit
+    local num
+    unit=$(echo "${1}" | sed 's/[0-9]*//g')
+    num=$(echo "${1}" | sed 's/[^0-9]*//g')
+    case ${unit} in
+    *g | *G)
+        echo $((num*1024*1024))
+        ;;
+    *m | *M)
+        echo $((num*1024))
+        ;;
+    *k | *K)
+        echo $((num))
+        ;;
+    *)
+        echo $((num/1024))
+        ;;
+    esac
+}
+
+get_default_hpgsz() {
+    default_hpgsz=$(grep "Hugepagesize:" /proc/meminfo \
+        | sed 's/^Hugepagesize:\s*//g' | sed 's/\s*kB$//g')
+    echo "${default_hpgsz}"
+}
+
+get_hugetlbfs_mountpoint() {
+    local requested_hpgsz
+    local mp_hpgsz
+    requested_hpgsz=$(get_kbytes "${1}")
+
+    grep hugetlbfs /proc/mounts | while read \
+        mntfrom mntpoint mntfstype mntopt mntdump mntfsck; do
+
+        # check if the current muntpoint is of the requested huge page size
+        case ${mntopt} in
+        *pagesize=*)
+            mp_hpgsz=$(echo "${mntopt}" | sed 's/.*pagesize=//g' | sed 's/,.*//g')
+            mp_hpgsz=$(get_kbytes "${mp_hpgsz}")
+            ;;
+        *)
+            mp_hpgsz=$(get_default_hpgsz)
+            ;;
+        esac
+        if [ "${requested_hpgsz}" -eq "${mp_hpgsz}" ]; then
+            echo "${mntpoint}"
+            return
+        fi
+    done
+}
+
+_mount_hugetlbfs() {
+    local MNT="/dev/hugepages"
+    local MNTOPTS=""
+    local requested_hpgsz
+    local default_hpgsz
+    requested_hpgsz=$(get_kbytes "${1}")
+    default_hpgsz=$(get_default_hpgsz)
+
+    # kernel might not support the requested size
+    if [ ! -d "/sys/kernel/mm/hugepages/hugepages-${requested_hpgsz}kB" ]; then
+        echo "WARNING: requested page size of ${requested_hpgsz}kB " \
+             "not supported by the kernel"
+        return 0
+    fi
+
+    # special case if this is not the default huge page size
+    if [ "${requested_hpgsz}" -ne "${default_hpgsz}" ]; then
+        MNT="${MNT}-${requested_hpgsz}"
+        MNTOPTS="pagesize=${requested_hpgsz}K"
+    fi
+
+    if [ ! -e "${MNT}" ]; then
+        mkdir "${MNT}"
+        if [ $? -ne 0 ]; then
+            echo "Could not create directory ${MNT}!" >&2
+            return 1
+        fi
+    fi
+    mount -thugetlbfs hugetlbfs "${MNT}" -o "${MNTOPTS}"
+    return $?
+}
+
+#
+# The DPDK library will use the first mounted instance it finds for a given
+# page size. so if there is already one for a given size there is no need to
+# create another for the same huge page size.
+#
+mount_hugetlbfs() {
+    if [ ! -r "$DPDK_CONF" ]; then
+        return 1
+    fi
+    . "$DPDK_CONF"
+
+    # if a page size is requested, there has to be a mountpoint for that size
+    if [ -n "${NR_2M_PAGES}" -a -z "$(get_hugetlbfs_mountpoint '2M')" ]; then
+        _mount_hugetlbfs 2M
+    fi
+    if [ -n "${NR_16M_PAGES}" -a -z "$(get_hugetlbfs_mountpoint '16M')" ]; then
+        _mount_hugetlbfs 16M
+    fi
+    if [ -n "${NR_1G_PAGES}" -a -z "$(get_hugetlbfs_mountpoint '1G')" ]; then
+        _mount_hugetlbfs 1G
+    fi
+}
+
+_setup_hugepages() {
+    MMDIR="/sys/kernel/mm/hugepages/${1}"
+    PAGES=${2}
+
+    if [ "$PAGES" != "" ]; then
+        if [ "$PAGES" -gt 0 ]; then
+            if [ -d "$MMDIR" -a -w "$MMDIR/nr_hugepages" ]; then
+                # increases the chance to allocate enough huge pages
+                # configurable, since it comes at a perf penality
+                if [ "$DROPCACHE_BEFORE_HP_ALLOC" = "1" ]; then
+                    echo 3 > /proc/sys/vm/drop_caches
+                fi
+
+                echo "$PAGES" > "$MMDIR/nr_hugepages"
+
+                GOTPAGES=$(cat "$MMDIR/nr_hugepages")
+                if [ "$GOTPAGES" -lt "$PAGES" ]; then
+                    echo "WARNING: could not allocate $PAGES at " \
+                         "$MMDIR/nr_hugepages (only got $GOTPAGES)."
+                fi
+            else
+                echo "WARNING: $MMDIR/nr_hugepages not found/writable"
+            fi
+        fi
+    fi
+}
+
+#
+# Reserve a certain amount of hugepages (defined in /etc/dpdk.conf)
+#
+setup_hugepages() {
+    if [ ! -r "$DPDK_CONF" ]; then
+        return 1
+    fi
+    . "$DPDK_CONF"
+
+    _setup_hugepages "hugepages-2048kB" "$NR_2M_PAGES"
+    _setup_hugepages "hugepages-16384kB" "$NR_16M_PAGES"
+    _setup_hugepages "hugepages-1048576kB" "$NR_1G_PAGES"
+
+    # dpdk uses 2*#hugepages mappings, increase for huge systems LP #1507921
+    if [ -d /sys/kernel/mm/hugepages ]; then
+        max_map_count=$(awk -v pad=65530 '{tot+=$1}END{print tot*2+pad}' \
+            /sys/kernel/mm/hugepages/hugepages-*/nr_hugepages)
+        sysctl -q vm.max_map_count="${max_map_count:-65530}"
+    fi
+
+    return 0
+}
+
+#
+# Allow NICs to be automatically bound to DPDK compatible drivers on boot.
+#
+bind_interfaces() {
+    if [ ! -r "$DPDK_INTERF" ]; then
+        return 0
+    fi
+    grep -v '^[ \t]*#' "$DPDK_INTERF" | while read BUS ID MOD; do
+        if [ "$BUS" = "" -o "$ID" = "" -o "$MOD" = "" ]; then
+            echo "WARNING: incomplete spec in $DPDK_INTERF" \
+                " - BUS '$BUS' ID '$ID' MOD '$MOD'"
+            continue
+        fi
+        if [ "$BUS" != "pci" ]; then
+            echo "WARNING: incompatible bus '$BUS' in $DPDK_INTERF"
+            continue
+        fi
+
+        SYSFSPATH="/sys/bus/$BUS/devices/$ID"
+        if [ ! -e "$SYSFSPATH" ]; then
+            echo "WARNING: invalid pci ID '$ID' in $DPDK_INTERF" \
+                " - '$SYSFSPATH' does not exist"
+            continue
+        fi
+        if [ -L "$SYSFSPATH/driver" ]; then
+            CUR=$(readlink "$SYSFSPATH/driver")
+            CUR=$(basename "$CUR")
+        else
+            # device existing, but currently unregistered
+            CUR=""
+        fi
+        if [ "$MOD" != "$CUR" ]; then
+            modprobe -q "$MOD" || true
+            # cloud img have no linux-image-extra initially (uip_pci_generic)
+            # so check if the module is available (loadable/built in)
+            if [ -e "/sys/bus/pci/drivers/${MOD}" ]; then
+                echo "Reassigning pci:$ID to $MOD"
+                $DPDK_BIND -b "$MOD" "$ID"
+            else
+                echo "Warning: failed assigning pci:$ID," \
+                     " module $MOD not available"
+            fi
+        else
+            echo "pci:$ID already assigned to $MOD"
+        fi
+    done
+}
+
+
+
+case "$1" in
+start)
+    mount_hugetlbfs
+    setup_hugepages
+    bind_interfaces
+    ;;
+stop)
+    ;;
+reload|force-reload)
+    setup_hugepages
+    bind_interfaces
+    ;;
+status)
+    $DPDK_BIND --status
+    ;;
+*)
+    echo "Usage: $0 {start|stop|reload|force-reload|status}"
+    exit 1
+    ;;
+esac
+
diff --git a/tools/init/dpdk.conf b/tools/init/dpdk.conf
new file mode 100644
index 0000000..a5aea86
--- /dev/null
+++ b/tools/init/dpdk.conf
@@ -0,0 +1,60 @@
+#
+# The number of 2M hugepages to reserve on system boot
+#
+# Default is 0
+# To e.g. let it reserve 128M via 64x 2M Hugepages set:
+# NR_2M_PAGES=64
+
+#
+# The number of 1G hugepages to reserve on system boot
+#
+# Default is 0
+# To e.g. let it reserve 2G via 2x 1G Hugepages set:
+# NR_1G_PAGES=2
+
+# The number of 16M hugepages to reserve, supported e.g. on ppc64el
+#
+# Default is 0
+# To e.g. let it reserve 512M via 32x 16M Hugepages set:
+# NR_16M_PAGES=32
+
+#
+# Dropping slab and pagecache can help to successfully allocate hugepages,
+# especially later in the lifecycle of a system.
+# This comes at the cost of loosing all slab and pagecache on (re)start
+# of the dpdk service - therefore the default is off.
+#
+# Default is 0
+# Set to 1 to enable it
+#DROPCACHE_BEFORE_HP_ALLOC=0
+
+# The DPDK library will use the first mounted hugetlbfs.
+# The init scripts try to ensure there is at least one default hugetlbfs
+# mountpoint on start.
+# If you have multiple hugetlbfs mountpoints for a complex (e.g. specific numa
+# policies) setup it should be controlled by the admin instead of this init
+# script. In that case specific mountpoints can be provided as parameters to
+# the DPDK library.
+
+# Hardware may support other granularities of hugepages (like 4M). But the
+# larger the hugepages the earlier those should be allocated.
+# Note: the dpdk init scripts will report warnings, but not fail if they could
+# not allocate the requested amount of hugepages.
+# The more or the larger the hugepages to be allocated are, the more it is
+# recommended to do the reservation as kernel commandline arguments.
+# To do so edit /etc/default/grub: GRUB_CMDLINE_LINUX_DEFAULT
+# and add [hugepagesz=xx] hugepages=yy ...
+#
+# Kernel commandline config:
+# hugepagesz sets the size for the next hugepages reservation (default 2M)
+# hugepages  reserves the given number of hugepages of the size set before
+#
+# After modifying /etc/default/grub, the command "update-grub" has to be
+# run in order to re-generate the grub config files. The new values will
+# be used after next reboot.
+#
+# example:
+# GRUB_CMDLINE_LINUX_DEFAULT="... hugepages=16 hugepagesz=1G hugepages=2"
+#
+# If the system supports it, this will reserve 16x 2M pages and 2x 1G pages.
+#
diff --git a/tools/init/dpdk.init.in b/tools/init/dpdk.init.in
new file mode 100755
index 0000000..1e26450
--- /dev/null
+++ b/tools/init/dpdk.init.in
@@ -0,0 +1,57 @@
+#!/bin/sh
+
+### BEGIN INIT INFO
+# Provides:          dpdk
+# Required-Start:    $remote_fs $local_fs
+# Required-Stop:     $remote_fs $local_fs
+# Default-Start:     S
+# Default-Stop:      0 1 6
+# Short-Description: start dpdk runtime environment
+### END INIT INFO
+
+set -e
+
+PATH="/sbin:/bin:/usr/bin"
+
+[ -d @@configdir@@ ] || exit 0
+
+# Define LSB log_* functions.
+# Depend on lsb-base (>= 3.2-14) to ensure that this file is present
+# and status_of_proc is working.
+. /lib/lsb/init-functions
+
+error=0
+case "$1" in
+start)
+    log_action_begin_msg "Starting DPDK environment" "dpdk"
+    output=$(@@sbindir@@/dpdk-init start 2>&1) || error="$?"
+    if [ ! -z "$output" ]; then
+        echo "$output" | while read line; do
+            log_action_cont_msg "$line"
+        done
+    fi
+    log_action_end_msg $error
+    exit $error
+    ;;
+stop)
+    ;;
+restart|force-reload)
+    ;;
+status)
+    output=$(@@sbindir@@/dpdk-init --status 2>&1) || error="$?"
+    if [ ! -z "$output" ]; then
+        echo "$output" | while read line; do
+            log_action_cont_msg "$line"
+        done
+    fi
+    log_action_end_msg $error
+    exit $error
+    ;;
+*)
+    echo "Usage: $0 {start|stop|restart|force-reload|status}"
+    exit 1
+    ;;
+esac
+
+exit 0
+
diff --git a/tools/init/dpdk.service.in b/tools/init/dpdk.service.in
new file mode 100644
index 0000000..1968081
--- /dev/null
+++ b/tools/init/dpdk.service.in
@@ -0,0 +1,12 @@
+[Unit]
+Description=DPDK runtime environment
+DefaultDependencies=false
+After=network-pre.target local-fs.target
+
+[Service]
+Type=oneshot
+RemainAfterExit=yes
+ExecStart=@@sbindir@@/dpdk-init start
+
+[Install]
+WantedBy=multi-user.target
diff --git a/tools/init/interfaces b/tools/init/interfaces
new file mode 100644
index 0000000..73c3fca
--- /dev/null
+++ b/tools/init/interfaces
@@ -0,0 +1,16 @@
+#
+# <bus>		Currently only "pci" is supported
+# <id>		Device ID on the specified bus
+# <driver>	Driver to bind against (vfio-pci, uio_pci_generic, igb_uio or
+#               rte_kni)
+#
+# Be aware that the two dpdk compatible drivers uio_pci_generic and vfio-pci are
+# part of linux-image-extra-<VERSION> package on Debian-based distributions.
+# This package is not always installed by default - for example in cloud-images.
+# So please install it in case you run into missing module issues.
+#
+# <bus>	<id>		<driver>
+# pci	0000:04:00.0	vfio-pci
+# pci	0000:04:00.1	uio_pci_generic
+# pci	0000:05:00.0	igb_uio
+# pci	0000:06:00.0	rte_kni
-- 
2.1.4

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2023-06-08 16:45 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-12 19:24 [dpdk-dev] [PATCH] SDK: Add scripts to initialize DPDK runtime Luca Boccassi
2016-12-12 21:12 ` Bruce Richardson
2016-12-12 21:58   ` Luca Boccassi
2016-12-13  7:00     ` Christian Ehrhardt
     [not found]       ` <22e6b726-79f5-5c2b-2cc6-b11fba6384c7@canonical.com>
2016-12-13 15:19         ` Christian Ehrhardt
2019-01-17 17:38           ` Stephen Hemminger
2019-01-18 15:04             ` Ferruh Yigit
2016-12-12 23:41   ` Jay Rolette
2016-12-13 16:47 ` [dpdk-dev] [PATCH v2] " Luca Boccassi
2016-12-19 14:15   ` Thomas Monjalon
2017-01-12 13:43     ` Christian Ehrhardt
2017-03-16 18:13       ` Thomas Monjalon
2017-03-22 19:15         ` Luca Boccassi
2019-01-17 17:22     ` Ferruh Yigit
2017-09-18 20:44   ` Ferruh Yigit
2017-09-19  5:42     ` Christian Ehrhardt
2023-06-08 16:45   ` Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).