From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
To: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Cc: Shahaf Shuler <shahafs@mellanox.com>,
Nelio Laranjeiro <nelio.laranjeiro@6wind.com>,
Yongseok Koh <yskoh@mellanox.com>,
dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH v1] net/mlx: control netdevices through ioctl only
Date: Fri, 9 Feb 2018 18:33:51 -0200 [thread overview]
Message-ID: <20180209203351.GA8519@localhost.localdomain> (raw)
In-Reply-To: <20180208163538.22407-1-adrien.mazarguil@6wind.com>
On Thu, Feb 08, 2018 at 05:37:06PM +0100, Adrien Mazarguil wrote:
> Several control operations implemented by these PMDs affect netdevices
> through sysfs, itself subject to file system permission checks enforced by
> the kernel, which limits their use for most purposes to applications
> running with root privileges.
>
> Since performing the same operations through ioctl() requires fewer
> capabilities (only CAP_NET_ADMIN) and given the remaining operations are
> already implemented this way, this patch standardizes on ioctl() and gets
> rid of redundant code.
>
> Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> ---
> drivers/net/mlx4/mlx4_ethdev.c | 192 ++-------------------------
> drivers/net/mlx5/mlx5.h | 2 -
> drivers/net/mlx5/mlx5_ethdev.c | 255 ++++--------------------------------
> drivers/net/mlx5/mlx5_stats.c | 28 +++-
> 4 files changed, 63 insertions(+), 414 deletions(-)
>
> diff --git a/drivers/net/mlx4/mlx4_ethdev.c b/drivers/net/mlx4/mlx4_ethdev.c
> index 3bc692731..fbeef16c8 100644
> --- a/drivers/net/mlx4/mlx4_ethdev.c
> +++ b/drivers/net/mlx4/mlx4_ethdev.c
> @@ -132,167 +132,6 @@ mlx4_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
> }
>
> /**
> - * Read from sysfs entry.
> - *
> - * @param[in] priv
> - * Pointer to private structure.
> - * @param[in] entry
> - * Entry name relative to sysfs path.
> - * @param[out] buf
> - * Data output buffer.
> - * @param size
> - * Buffer size.
> - *
> - * @return
> - * Number of bytes read on success, negative errno value otherwise and
> - * rte_errno is set.
> - */
> -static int
> -mlx4_sysfs_read(const struct priv *priv, const char *entry,
> - char *buf, size_t size)
> -{
> - char ifname[IF_NAMESIZE];
> - FILE *file;
> - int ret;
> -
> - ret = mlx4_get_ifname(priv, &ifname);
> - if (ret)
> - return ret;
> -
> - MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
> - ifname, entry);
> -
> - file = fopen(path, "rb");
> - if (file == NULL) {
> - rte_errno = errno;
> - return -rte_errno;
> - }
> - ret = fread(buf, 1, size, file);
> - if ((size_t)ret < size && ferror(file)) {
> - rte_errno = EIO;
> - ret = -rte_errno;
> - } else {
> - ret = size;
> - }
> - fclose(file);
> - return ret;
> -}
> -
> -/**
> - * Write to sysfs entry.
> - *
> - * @param[in] priv
> - * Pointer to private structure.
> - * @param[in] entry
> - * Entry name relative to sysfs path.
> - * @param[in] buf
> - * Data buffer.
> - * @param size
> - * Buffer size.
> - *
> - * @return
> - * Number of bytes written on success, negative errno value otherwise and
> - * rte_errno is set.
> - */
> -static int
> -mlx4_sysfs_write(const struct priv *priv, const char *entry,
> - char *buf, size_t size)
> -{
> - char ifname[IF_NAMESIZE];
> - FILE *file;
> - int ret;
> -
> - ret = mlx4_get_ifname(priv, &ifname);
> - if (ret)
> - return ret;
> -
> - MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
> - ifname, entry);
> -
> - file = fopen(path, "wb");
> - if (file == NULL) {
> - rte_errno = errno;
> - return -rte_errno;
> - }
> - ret = fwrite(buf, 1, size, file);
> - if ((size_t)ret < size || ferror(file)) {
> - rte_errno = EIO;
> - ret = -rte_errno;
> - } else {
> - ret = size;
> - }
> - fclose(file);
> - return ret;
> -}
> -
> -/**
> - * Get unsigned long sysfs property.
> - *
> - * @param priv
> - * Pointer to private structure.
> - * @param[in] name
> - * Entry name relative to sysfs path.
> - * @param[out] value
> - * Value output buffer.
> - *
> - * @return
> - * 0 on success, negative errno value otherwise and rte_errno is set.
> - */
> -static int
> -mlx4_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value)
> -{
> - int ret;
> - unsigned long value_ret;
> - char value_str[32];
> -
> - ret = mlx4_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
> - if (ret < 0) {
> - DEBUG("cannot read %s value from sysfs: %s",
> - name, strerror(rte_errno));
> - return ret;
> - }
> - value_str[ret] = '\0';
> - errno = 0;
> - value_ret = strtoul(value_str, NULL, 0);
> - if (errno) {
> - rte_errno = errno;
> - DEBUG("invalid %s value `%s': %s", name, value_str,
> - strerror(rte_errno));
> - return -rte_errno;
> - }
> - *value = value_ret;
> - return 0;
> -}
> -
> -/**
> - * Set unsigned long sysfs property.
> - *
> - * @param priv
> - * Pointer to private structure.
> - * @param[in] name
> - * Entry name relative to sysfs path.
> - * @param value
> - * Value to set.
> - *
> - * @return
> - * 0 on success, negative errno value otherwise and rte_errno is set.
> - */
> -static int
> -mlx4_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value)
> -{
> - int ret;
> - MKSTR(value_str, "%lu", value);
> -
> - ret = mlx4_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
> - if (ret < 0) {
> - DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
> - name, value_str, value, strerror(rte_errno));
> - return ret;
> - }
> - return 0;
> -}
> -
> -/**
> * Perform ifreq ioctl() on associated Ethernet device.
> *
> * @param[in] priv
> @@ -361,12 +200,12 @@ mlx4_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
> int
> mlx4_mtu_get(struct priv *priv, uint16_t *mtu)
> {
> - unsigned long ulong_mtu = 0;
> - int ret = mlx4_get_sysfs_ulong(priv, "mtu", &ulong_mtu);
> + struct ifreq request;
> + int ret = mlx4_ifreq(priv, SIOCGIFMTU, &request);
>
> if (ret)
> return ret;
> - *mtu = ulong_mtu;
> + *mtu = request.ifr_mtu;
> return 0;
> }
>
> @@ -385,20 +224,13 @@ int
> mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
> {
> struct priv *priv = dev->data->dev_private;
> - uint16_t new_mtu;
> - int ret = mlx4_set_sysfs_ulong(priv, "mtu", mtu);
> + struct ifreq request = { .ifr_mtu = mtu, };
> + int ret = mlx4_ifreq(priv, SIOCSIFMTU, &request);
>
> if (ret)
> return ret;
> - ret = mlx4_mtu_get(priv, &new_mtu);
> - if (ret)
> - return ret;
> - if (new_mtu == mtu) {
> - priv->mtu = mtu;
> - return 0;
> - }
> - rte_errno = EINVAL;
> - return -rte_errno;
> + priv->mtu = mtu;
> + return 0;
> }
>
> /**
> @@ -417,14 +249,14 @@ mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
> static int
> mlx4_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)
> {
> - unsigned long tmp = 0;
> - int ret = mlx4_get_sysfs_ulong(priv, "flags", &tmp);
> + struct ifreq request;
> + int ret = mlx4_ifreq(priv, SIOCGIFFLAGS, &request);
>
> if (ret)
> return ret;
> - tmp &= keep;
> - tmp |= (flags & (~keep));
> - return mlx4_set_sysfs_ulong(priv, "flags", tmp);
> + request.ifr_flags &= keep;
> + request.ifr_flags |= flags & ~keep;
> + return mlx4_ifreq(priv, SIOCSIFFLAGS, &request);
> }
>
> /**
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index 965c19f21..da44faaf4 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -209,8 +209,6 @@ struct priv *mlx5_get_priv(struct rte_eth_dev *dev);
> int mlx5_is_secondary(void);
> int priv_get_ifname(const struct priv *, char (*)[IF_NAMESIZE]);
> int priv_ifreq(const struct priv *, int req, struct ifreq *);
> -int priv_is_ib_cntr(const char *);
> -int priv_get_cntr_sysfs(struct priv *, const char *, uint64_t *);
> int priv_get_num_vfs(struct priv *, uint16_t *);
> int priv_get_mtu(struct priv *, uint16_t *);
> int priv_set_flags(struct priv *, unsigned int, unsigned int);
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> index 666507691..b73cb53df 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -7,6 +7,7 @@
>
> #include <stddef.h>
> #include <assert.h>
> +#include <inttypes.h>
> #include <unistd.h>
> #include <stdint.h>
> #include <stdio.h>
> @@ -173,181 +174,6 @@ priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
> }
>
> /**
> - * Check if the counter is located on ib counters file.
> - *
> - * @param[in] cntr
> - * Counter name.
> - *
> - * @return
> - * 1 if counter is located on ib counters file , 0 otherwise.
> - */
> -int
> -priv_is_ib_cntr(const char *cntr)
> -{
> - if (!strcmp(cntr, "out_of_buffer"))
> - return 1;
> - return 0;
> -}
> -
> -/**
> - * Read from sysfs entry.
> - *
> - * @param[in] priv
> - * Pointer to private structure.
> - * @param[in] entry
> - * Entry name relative to sysfs path.
> - * @param[out] buf
> - * Data output buffer.
> - * @param size
> - * Buffer size.
> - *
> - * @return
> - * 0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_sysfs_read(const struct priv *priv, const char *entry,
> - char *buf, size_t size)
> -{
> - char ifname[IF_NAMESIZE];
> - FILE *file;
> - int ret;
> - int err;
> -
> - if (priv_get_ifname(priv, &ifname))
> - return -1;
> -
> - if (priv_is_ib_cntr(entry)) {
> - MKSTR(path, "%s/ports/1/hw_counters/%s",
> - priv->ibdev_path, entry);
> - file = fopen(path, "rb");
> - } else {
> - MKSTR(path, "%s/device/net/%s/%s",
> - priv->ibdev_path, ifname, entry);
> - file = fopen(path, "rb");
> - }
> - if (file == NULL)
> - return -1;
> - ret = fread(buf, 1, size, file);
> - err = errno;
> - if (((size_t)ret < size) && (ferror(file)))
> - ret = -1;
> - else
> - ret = size;
> - fclose(file);
> - errno = err;
> - return ret;
> -}
> -
> -/**
> - * Write to sysfs entry.
> - *
> - * @param[in] priv
> - * Pointer to private structure.
> - * @param[in] entry
> - * Entry name relative to sysfs path.
> - * @param[in] buf
> - * Data buffer.
> - * @param size
> - * Buffer size.
> - *
> - * @return
> - * 0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_sysfs_write(const struct priv *priv, const char *entry,
> - char *buf, size_t size)
> -{
> - char ifname[IF_NAMESIZE];
> - FILE *file;
> - int ret;
> - int err;
> -
> - if (priv_get_ifname(priv, &ifname))
> - return -1;
> -
> - MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry);
> -
> - file = fopen(path, "wb");
> - if (file == NULL)
> - return -1;
> - ret = fwrite(buf, 1, size, file);
> - err = errno;
> - if (((size_t)ret < size) || (ferror(file)))
> - ret = -1;
> - else
> - ret = size;
> - fclose(file);
> - errno = err;
> - return ret;
> -}
> -
> -/**
> - * Get unsigned long sysfs property.
> - *
> - * @param priv
> - * Pointer to private structure.
> - * @param[in] name
> - * Entry name relative to sysfs path.
> - * @param[out] value
> - * Value output buffer.
> - *
> - * @return
> - * 0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value)
> -{
> - int ret;
> - unsigned long value_ret;
> - char value_str[32];
> -
> - ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
> - if (ret == -1) {
> - DEBUG("cannot read %s value from sysfs: %s",
> - name, strerror(errno));
> - return -1;
> - }
> - value_str[ret] = '\0';
> - errno = 0;
> - value_ret = strtoul(value_str, NULL, 0);
> - if (errno) {
> - DEBUG("invalid %s value `%s': %s", name, value_str,
> - strerror(errno));
> - return -1;
> - }
> - *value = value_ret;
> - return 0;
> -}
> -
> -/**
> - * Set unsigned long sysfs property.
> - *
> - * @param priv
> - * Pointer to private structure.
> - * @param[in] name
> - * Entry name relative to sysfs path.
> - * @param value
> - * Value to set.
> - *
> - * @return
> - * 0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value)
> -{
> - int ret;
> - MKSTR(value_str, "%lu", value);
> -
> - ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
> - if (ret == -1) {
> - DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
> - name, value_str, value, strerror(errno));
> - return -1;
> - }
> - return 0;
> -}
> -
> -/**
> * Perform ifreq ioctl() on associated Ethernet device.
> *
> * @param[in] priv
> @@ -390,20 +216,25 @@ priv_get_num_vfs(struct priv *priv, uint16_t *num_vfs)
> {
> /* The sysfs entry name depends on the operating system. */
> const char **name = (const char *[]){
> - "device/sriov_numvfs",
> - "device/mlx5_num_vfs",
> + "sriov_numvfs",
> + "mlx5_num_vfs",
> NULL,
> };
> - int ret;
>
> do {
> - unsigned long ulong_num_vfs;
> + int n;
> + FILE *file;
> + MKSTR(path, "%s/device/%s", priv->ibdev_path, *name);
>
> - ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs);
> - if (!ret)
> - *num_vfs = ulong_num_vfs;
> - } while (*(++name) && ret);
> - return ret;
> + file = fopen(path, "rb");
> + if (!file)
> + continue;
> + n = fscanf(file, "%" SCNu16, num_vfs);
> + fclose(file);
> + if (n == 1)
> + return 0;
> + } while (*(++name));
> + return -1;
> }
>
> /**
> @@ -420,35 +251,12 @@ priv_get_num_vfs(struct priv *priv, uint16_t *num_vfs)
> int
> priv_get_mtu(struct priv *priv, uint16_t *mtu)
> {
> - unsigned long ulong_mtu;
> + struct ifreq request;
> + int ret = priv_ifreq(priv, SIOCGIFMTU, &request);
>
> - if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1)
> - return -1;
> - *mtu = ulong_mtu;
> - return 0;
> -}
> -
> -/**
> - * Read device counter from sysfs.
> - *
> - * @param priv
> - * Pointer to private structure.
> - * @param name
> - * Counter name.
> - * @param[out] cntr
> - * Counter output buffer.
> - *
> - * @return
> - * 0 on success, -1 on failure and errno is set.
> - */
> -int
> -priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr)
> -{
> - unsigned long ulong_ctr;
> -
> - if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1)
> - return -1;
> - *cntr = ulong_ctr;
> + if (ret)
> + return ret;
> + *mtu = request.ifr_mtu;
> return 0;
> }
>
> @@ -466,15 +274,9 @@ priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr)
> static int
> priv_set_mtu(struct priv *priv, uint16_t mtu)
> {
> - uint16_t new_mtu;
> + struct ifreq request = { .ifr_mtu = mtu, };
>
> - if (priv_set_sysfs_ulong(priv, "mtu", mtu) ||
> - priv_get_mtu(priv, &new_mtu))
> - return -1;
> - if (new_mtu == mtu)
> - return 0;
> - errno = EINVAL;
> - return -1;
> + return priv_ifreq(priv, SIOCSIFMTU, &request);
> }
>
> /**
> @@ -493,13 +295,14 @@ priv_set_mtu(struct priv *priv, uint16_t mtu)
> int
> priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)
> {
> - unsigned long tmp;
> + struct ifreq request;
> + int ret = priv_ifreq(priv, SIOCGIFFLAGS, &request);
>
> - if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1)
> - return -1;
> - tmp &= keep;
> - tmp |= (flags & (~keep));
> - return priv_set_sysfs_ulong(priv, "flags", tmp);
> + if (ret)
> + return ret;
> + request.ifr_flags &= keep;
> + request.ifr_flags |= flags & ~keep;
> + return priv_ifreq(priv, SIOCSIFFLAGS, &request);
> }
>
> /**
> diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
> index 378472a70..eb9c65dcc 100644
> --- a/drivers/net/mlx5/mlx5_stats.c
> +++ b/drivers/net/mlx5/mlx5_stats.c
> @@ -3,8 +3,11 @@
> * Copyright 2015 Mellanox.
> */
>
> +#include <inttypes.h>
> #include <linux/sockios.h>
> #include <linux/ethtool.h>
> +#include <stdint.h>
> +#include <stdio.h>
>
> #include <rte_ethdev_driver.h>
> #include <rte_common.h>
> @@ -19,6 +22,7 @@ struct mlx5_counter_ctrl {
> char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
> /* Name of the counter on the device table. */
> char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
> + uint32_t ib:1; /**< Nonzero for IB counters. */
> };
>
> static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
> @@ -93,6 +97,7 @@ static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
> {
> .dpdk_name = "rx_out_of_buffer",
> .ctr_name = "out_of_buffer",
> + .ib = 1,
> },
> {
> .dpdk_name = "tx_packets_phy",
> @@ -143,13 +148,24 @@ priv_read_dev_counters(struct priv *priv, uint64_t *stats)
> return -1;
> }
> for (i = 0; i != xstats_n; ++i) {
> - if (priv_is_ib_cntr(mlx5_counters_init[i].ctr_name))
> - priv_get_cntr_sysfs(priv,
> - mlx5_counters_init[i].ctr_name,
> - &stats[i]);
> - else
> + if (mlx5_counters_init[i].ib) {
> + FILE *file;
> + MKSTR(path, "%s/ports/1/hw_counters/%s",
> + priv->ibdev_path,
> + mlx5_counters_init[i].ctr_name);
> +
> + file = fopen(path, "rb");
> + if (file) {
> + int n = fscanf(file, "%" SCNu64, &stats[i]);
> +
> + fclose(file);
> + if (n != 1)
> + stats[i] = 0;
> + }
> + } else {
> stats[i] = (uint64_t)
> et_stats->data[xstats_ctrl->dev_table_idx[i]];
> + }
> }
> return 0;
> }
> @@ -232,7 +248,7 @@ priv_xstats_init(struct priv *priv)
> }
> }
> for (j = 0; j != xstats_n; ++j) {
> - if (priv_is_ib_cntr(mlx5_counters_init[j].ctr_name))
> + if (mlx5_counters_init[j].ib)
> continue;
> if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) {
> WARN("counter \"%s\" is not recognized",
> --
> 2.11.0
next prev parent reply other threads:[~2018-02-09 20:33 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-02-08 16:37 Adrien Mazarguil
2018-02-09 20:33 ` Marcelo Ricardo Leitner [this message]
2018-02-28 8:07 ` Shahaf Shuler
2018-02-11 11:55 ` Shahaf Shuler
2018-02-12 8:47 ` Adrien Mazarguil
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180209203351.GA8519@localhost.localdomain \
--to=marcelo.leitner@gmail.com \
--cc=adrien.mazarguil@6wind.com \
--cc=dev@dpdk.org \
--cc=nelio.laranjeiro@6wind.com \
--cc=shahafs@mellanox.com \
--cc=yskoh@mellanox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).