From: Adrien Mazarguil <adrien.mazarguil@6wind.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v2 3/3] mlx5: RETA query/update support
Date: Fri, 30 Oct 2015 19:58:57 +0100 [thread overview]
Message-ID: <1446231537-8380-4-git-send-email-adrien.mazarguil@6wind.com> (raw)
In-Reply-To: <1446231537-8380-1-git-send-email-adrien.mazarguil@6wind.com>
From: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
ConnectX-4 is able to use indirection tables size of power of two, but
with the current API it is impossible to predict its size, so to simplify,
for any query/update RETA command, the indirection table is modified to use
the maximum value.
A port stop/start must be done to apply the new RETA configuration.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5.c | 4 +
drivers/net/mlx5/mlx5.h | 7 ++
drivers/net/mlx5/mlx5_ethdev.c | 29 ++++++++
| 163 +++++++++++++++++++++++++++++++++++++++++
drivers/net/mlx5/mlx5_rxq.c | 53 ++------------
drivers/net/mlx5/mlx5_utils.h | 20 +++++
6 files changed, 231 insertions(+), 45 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 9636588..43a40d7 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -133,6 +133,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
rte_free((*priv->rss_conf)[i]);
rte_free(priv->rss_conf);
}
+ if (priv->reta_idx != NULL)
+ rte_free(priv->reta_idx);
priv_unlock(priv);
memset(priv, 0, sizeof(*priv));
}
@@ -160,6 +162,8 @@ static const struct eth_dev_ops mlx5_dev_ops = {
.mac_addr_remove = mlx5_mac_addr_remove,
.mac_addr_add = mlx5_mac_addr_add,
.mtu_set = mlx5_dev_set_mtu,
+ .reta_update = mlx5_dev_rss_reta_update,
+ .reta_query = mlx5_dev_rss_reta_query,
.rss_hash_update = mlx5_rss_hash_update,
.rss_hash_conf_get = mlx5_rss_hash_conf_get,
};
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 0daacc8..b84d31d 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -118,6 +118,8 @@ struct priv {
/* RSS configuration array indexed by hash RX queue type. */
struct rte_eth_rss_conf *(*rss_conf)[];
struct rte_intr_handle intr_handle; /* Interrupt handler. */
+ unsigned int (*reta_idx)[]; /* RETA index table. */
+ unsigned int reta_idx_n; /* RETA index size. */
rte_spinlock_t lock; /* Lock for control functions. */
};
@@ -184,6 +186,11 @@ int rss_hash_rss_conf_new_key(struct priv *, const uint8_t *, unsigned int,
uint64_t);
int mlx5_rss_hash_update(struct rte_eth_dev *, struct rte_eth_rss_conf *);
int mlx5_rss_hash_conf_get(struct rte_eth_dev *, struct rte_eth_rss_conf *);
+int priv_rss_reta_index_resize(struct priv *, unsigned int);
+int mlx5_dev_rss_reta_query(struct rte_eth_dev *,
+ struct rte_eth_rss_reta_entry64 *, uint16_t);
+int mlx5_dev_rss_reta_update(struct rte_eth_dev *,
+ struct rte_eth_rss_reta_entry64 *, uint16_t);
/* mlx5_rxmode.c */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 84e877c..1159fa3 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -410,6 +410,9 @@ dev_configure(struct rte_eth_dev *dev)
struct priv *priv = dev->data->dev_private;
unsigned int rxqs_n = dev->data->nb_rx_queues;
unsigned int txqs_n = dev->data->nb_tx_queues;
+ unsigned int i;
+ unsigned int j;
+ unsigned int reta_idx_n;
priv->rxqs = (void *)dev->data->rx_queues;
priv->txqs = (void *)dev->data->tx_queues;
@@ -418,11 +421,31 @@ dev_configure(struct rte_eth_dev *dev)
(void *)dev, priv->txqs_n, txqs_n);
priv->txqs_n = txqs_n;
}
+ if (rxqs_n > priv->ind_table_max_size) {
+ ERROR("cannot handle this many RX queues (%u)", rxqs_n);
+ return EINVAL;
+ }
if (rxqs_n == priv->rxqs_n)
return 0;
INFO("%p: RX queues number update: %u -> %u",
(void *)dev, priv->rxqs_n, rxqs_n);
priv->rxqs_n = rxqs_n;
+ /* If the requested number of RX queues is not a power of two, use the
+ * maximum indirection table size for better balancing.
+ * The result is always rounded to the next power of two. */
+ reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ?
+ priv->ind_table_max_size :
+ rxqs_n));
+ if (priv_rss_reta_index_resize(priv, reta_idx_n))
+ return ENOMEM;
+ /* When the number of RX queues is not a power of two, the remaining
+ * table entries are padded with reused WQs and hashes are not spread
+ * uniformly. */
+ for (i = 0, j = 0; (i != reta_idx_n); ++i) {
+ (*priv->reta_idx)[i] = j;
+ if (++j == rxqs_n)
+ j = 0;
+ }
return 0;
}
@@ -494,6 +517,12 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
0);
if (priv_get_ifname(priv, &ifname) == 0)
info->if_index = if_nametoindex(ifname);
+ /* FIXME: RETA update/query API expects the callee to know the size of
+ * the indirection table, for this PMD the size varies depending on
+ * the number of RX queues, it becomes impossible to find the correct
+ * size if it is not fixed.
+ * The API should be updated to solve this problem. */
+ info->reta_size = priv->ind_table_max_size;
priv_unlock(priv);
}
--git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c
index bf19aca..7eb688a 100644
--- a/drivers/net/mlx5/mlx5_rss.c
+++ b/drivers/net/mlx5/mlx5_rss.c
@@ -211,3 +211,166 @@ mlx5_rss_hash_conf_get(struct rte_eth_dev *dev,
priv_unlock(priv);
return 0;
}
+
+/**
+ * Allocate/reallocate RETA index table.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @praram reta_size
+ * The size of the array to allocate.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_rss_reta_index_resize(struct priv *priv, unsigned int reta_size)
+{
+ void *mem;
+ unsigned int old_size = priv->reta_idx_n;
+
+ if (priv->reta_idx_n == reta_size)
+ return 0;
+
+ mem = rte_realloc(priv->reta_idx,
+ reta_size * sizeof((*priv->reta_idx)[0]), 0);
+ if (!mem)
+ return ENOMEM;
+ priv->reta_idx = mem;
+ priv->reta_idx_n = reta_size;
+
+ if (old_size < reta_size)
+ memset(&(*priv->reta_idx)[old_size], 0,
+ (reta_size - old_size) *
+ sizeof((*priv->reta_idx)[0]));
+ return 0;
+}
+
+/**
+ * Query RETA table.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param[in, out] reta_conf
+ * Pointer to the first RETA configuration structure.
+ * @param reta_size
+ * Number of entries.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+static int
+priv_dev_rss_reta_query(struct priv *priv,
+ struct rte_eth_rss_reta_entry64 *reta_conf,
+ unsigned int reta_size)
+{
+ unsigned int idx;
+ unsigned int i;
+ int ret;
+
+ /* See RETA comment in mlx5_dev_infos_get(). */
+ ret = priv_rss_reta_index_resize(priv, priv->ind_table_max_size);
+ if (ret)
+ return ret;
+
+ /* Fill each entry of the table even if its bit is not set. */
+ for (idx = 0, i = 0; (i != reta_size); ++i) {
+ idx = i / RTE_RETA_GROUP_SIZE;
+ reta_conf[idx].reta[i % RTE_RETA_GROUP_SIZE] =
+ (*priv->reta_idx)[i];
+ }
+ return 0;
+}
+
+/**
+ * Update RETA table.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param[in] reta_conf
+ * Pointer to the first RETA configuration structure.
+ * @param reta_size
+ * Number of entries.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+static int
+priv_dev_rss_reta_update(struct priv *priv,
+ struct rte_eth_rss_reta_entry64 *reta_conf,
+ unsigned int reta_size)
+{
+ unsigned int idx;
+ unsigned int i;
+ unsigned int pos;
+ int ret;
+
+ /* See RETA comment in mlx5_dev_infos_get(). */
+ ret = priv_rss_reta_index_resize(priv, priv->ind_table_max_size);
+ if (ret)
+ return ret;
+
+ for (idx = 0, i = 0; (i != reta_size); ++i) {
+ idx = i / RTE_RETA_GROUP_SIZE;
+ pos = i % RTE_RETA_GROUP_SIZE;
+ if (((reta_conf[idx].mask >> i) & 0x1) == 0)
+ continue;
+ assert(reta_conf[idx].reta[pos] < priv->rxqs_n);
+ (*priv->reta_idx)[i] = reta_conf[idx].reta[pos];
+ }
+ return 0;
+}
+
+/**
+ * DPDK callback to get the RETA indirection table.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param reta_conf
+ * Pointer to RETA configuration structure array.
+ * @param reta_size
+ * Size of the RETA table.
+ *
+ * @return
+ * 0 on success, negative errno value on failure.
+ */
+int
+mlx5_dev_rss_reta_query(struct rte_eth_dev *dev,
+ struct rte_eth_rss_reta_entry64 *reta_conf,
+ uint16_t reta_size)
+{
+ int ret;
+ struct priv *priv = dev->data->dev_private;
+
+ priv_lock(priv);
+ ret = priv_dev_rss_reta_query(priv, reta_conf, reta_size);
+ priv_unlock(priv);
+ return -ret;
+}
+
+/**
+ * DPDK callback to update the RETA indirection table.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param reta_conf
+ * Pointer to RETA configuration structure array.
+ * @param reta_size
+ * Size of the RETA table.
+ *
+ * @return
+ * 0 on success, negative errno value on failure.
+ */
+int
+mlx5_dev_rss_reta_update(struct rte_eth_dev *dev,
+ struct rte_eth_rss_reta_entry64 *reta_conf,
+ uint16_t reta_size)
+{
+ int ret;
+ struct priv *priv = dev->data->dev_private;
+
+ priv_lock(priv);
+ ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size);
+ priv_unlock(priv);
+ return -ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 084bf41..3d7ae7e 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -259,26 +259,6 @@ hash_rxq_flow_attr(const struct hash_rxq *hash_rxq,
}
/**
- * Return nearest power of two above input value.
- *
- * @param v
- * Input value.
- *
- * @return
- * Nearest power of two above input value.
- */
-static unsigned int
-log2above(unsigned int v)
-{
- unsigned int l;
- unsigned int r;
-
- for (l = 0, r = 0; (v >> 1); ++l, v >>= 1)
- r |= (v & 1);
- return (l + r);
-}
-
-/**
* Return the type corresponding to the n'th bit set.
*
* @param table
@@ -360,14 +340,7 @@ priv_make_ind_table_init(struct priv *priv,
int
priv_create_hash_rxqs(struct priv *priv)
{
- /* If the requested number of WQs is not a power of two, use the
- * maximum indirection table size for better balancing.
- * The result is always rounded to the next power of two. */
- unsigned int wqs_n =
- (1 << log2above((priv->rxqs_n & (priv->rxqs_n - 1)) ?
- priv->ind_table_max_size :
- priv->rxqs_n));
- struct ibv_exp_wq *wqs[wqs_n];
+ struct ibv_exp_wq *wqs[priv->reta_idx_n];
struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
unsigned int ind_tables_n =
priv_make_ind_table_init(priv, &ind_table_init);
@@ -393,25 +366,15 @@ priv_create_hash_rxqs(struct priv *priv)
" indirection table cannot be created");
return EINVAL;
}
- if ((wqs_n < priv->rxqs_n) || (wqs_n > priv->ind_table_max_size)) {
- ERROR("cannot handle this many RX queues (%u)", priv->rxqs_n);
- err = ERANGE;
- goto error;
- }
- if (wqs_n != priv->rxqs_n) {
+ if (priv->rxqs_n & (priv->rxqs_n - 1)) {
INFO("%u RX queues are configured, consider rounding this"
" number to the next power of two for better balancing",
priv->rxqs_n);
- DEBUG("indirection table extended to assume %u WQs", wqs_n);
- }
- /* When the number of RX queues is not a power of two, the remaining
- * table entries are padded with reused WQs and hashes are not spread
- * uniformly. */
- for (i = 0, j = 0; (i != wqs_n); ++i) {
- wqs[i] = (*priv->rxqs)[j]->wq;
- if (++j == priv->rxqs_n)
- j = 0;
+ DEBUG("indirection table extended to assume %u WQs",
+ priv->reta_idx_n);
}
+ for (i = 0; (i != priv->reta_idx_n); ++i)
+ wqs[i] = (*priv->rxqs)[(*priv->reta_idx)[i]]->wq;
/* Get number of hash RX queues to configure. */
for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
hash_rxqs_n += ind_table_init[i].hash_types_n;
@@ -436,8 +399,8 @@ priv_create_hash_rxqs(struct priv *priv)
unsigned int ind_tbl_size = ind_table_init[i].max_size;
struct ibv_exp_rwq_ind_table *ind_table;
- if (wqs_n < ind_tbl_size)
- ind_tbl_size = wqs_n;
+ if (priv->reta_idx_n < ind_tbl_size)
+ ind_tbl_size = priv->reta_idx_n;
ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
errno = 0;
ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index f1fad18..9b5e86a 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -161,4 +161,24 @@ pmd_drv_log_basename(const char *s)
\
snprintf(name, sizeof(name), __VA_ARGS__)
+/**
+ * Return nearest power of two above input value.
+ *
+ * @param v
+ * Input value.
+ *
+ * @return
+ * Nearest power of two above input value.
+ */
+static inline unsigned int
+log2above(unsigned int v)
+{
+ unsigned int l;
+ unsigned int r;
+
+ for (l = 0, r = 0; (v >> 1); ++l, v >>= 1)
+ r |= (v & 1);
+ return (l + r);
+}
+
#endif /* RTE_PMD_MLX5_UTILS_H_ */
--
2.1.0
next prev parent reply other threads:[~2015-10-30 18:59 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-10-05 17:57 [dpdk-dev] [PATCH 0/3] Add RETA configuration to mlx5 Adrien Mazarguil
2015-10-05 17:57 ` [dpdk-dev] [PATCH 1/3] cmdline: increase command line buffer Adrien Mazarguil
2015-10-05 17:57 ` [dpdk-dev] [PATCH 2/3] ethdev: change RETA type in rte_eth_rss_reta_entry64 Adrien Mazarguil
2015-10-05 17:57 ` [dpdk-dev] [PATCH 3/3] mlx5: RETA query/update support Adrien Mazarguil
2015-10-30 18:58 ` [dpdk-dev] [PATCH v2 0/3] Add RETA configuration to mlx5 Adrien Mazarguil
2015-10-30 18:58 ` [dpdk-dev] [PATCH v2 1/3] cmdline: increase command line buffer Adrien Mazarguil
2015-10-30 18:58 ` [dpdk-dev] [PATCH v2 2/3] ethdev: change RETA type in rte_eth_rss_reta_entry64 Adrien Mazarguil
2015-10-30 18:58 ` Adrien Mazarguil [this message]
2015-11-02 17:31 ` [dpdk-dev] [PATCH] mlx5: RETA query/update support Adrien Mazarguil
2015-11-02 17:40 ` Adrien Mazarguil
2015-11-02 18:11 ` [dpdk-dev] [PATCH v3] " Adrien Mazarguil
2015-11-03 10:23 ` Thomas Monjalon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1446231537-8380-4-git-send-email-adrien.mazarguil@6wind.com \
--to=adrien.mazarguil@6wind.com \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).