* [dpdk-dev] [PATCH 2/2] net/mlx4: support basic flow items and actions
2017-02-21 14:07 [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
@ 2017-02-21 14:07 ` Vasily Philipov
2017-02-22 8:37 ` Nélio Laranjeiro
2017-02-22 8:37 ` [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Nélio Laranjeiro
` (4 subsequent siblings)
5 siblings, 1 reply; 15+ messages in thread
From: Vasily Philipov @ 2017-02-21 14:07 UTC (permalink / raw)
To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro
Adding support for the next items: eth, vlan, ipv4, udp, tcp and for the
next actions: queue, drop
Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
drivers/net/mlx4/Makefile | 3 +-
drivers/net/mlx4/mlx4.c | 60 ++-
drivers/net/mlx4/mlx4.h | 3 +
drivers/net/mlx4/mlx4_flow.c | 1053 ++++++++++++++++++++++++++++++++++++++++++
drivers/net/mlx4/mlx4_flow.h | 104 +++++
5 files changed, 1220 insertions(+), 3 deletions(-)
create mode 100644 drivers/net/mlx4/mlx4_flow.c
create mode 100644 drivers/net/mlx4/mlx4_flow.h
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 68c5902..1d463f7 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -36,6 +36,7 @@ LIB = librte_pmd_mlx4.a
# Sources.
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_flow.c
# Dependencies.
DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += lib/librte_ether
@@ -129,7 +130,7 @@ mlx4_autoconf.h: mlx4_autoconf.h.new
cmp '$<' '$@' $(AUTOCONF_OUTPUT) || \
mv '$<' '$@'
-mlx4.o: mlx4_autoconf.h
+$(SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD):.c=.o): mlx4_autoconf.h
clean_mlx4: FORCE
$Q rm -f -- mlx4_autoconf.h mlx4_autoconf.h.new
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 82ccac8..cc2ebfa 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -82,12 +82,14 @@
#include <rte_log.h>
#include <rte_alarm.h>
#include <rte_memory.h>
+#include <rte_flow.h>
/* Generated configuration header. */
#include "mlx4_autoconf.h"
-/* PMD header. */
+/* PMD headers. */
#include "mlx4.h"
+#include "mlx4_flow.h"
/* Convenience macros for accessing mbuf fields. */
#define NEXT(m) ((m)->next)
@@ -2351,6 +2353,7 @@ struct txq_mp2mr_mbuf_check_data {
assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
*attr = (struct ibv_flow_attr){
.type = IBV_FLOW_ATTR_NORMAL,
+ .priority = 3,
.num_of_specs = 1,
.port = priv->port,
.flags = 0
@@ -3936,6 +3939,7 @@ struct txq_mp2mr_mbuf_check_data {
{
struct priv *priv = dev->data->dev_private;
unsigned int i = 0;
+ unsigned int err = 0;
unsigned int r;
struct rxq *rxq;
@@ -3985,8 +3989,9 @@ struct txq_mp2mr_mbuf_check_data {
return -ret;
} while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
priv_dev_interrupt_handler_install(priv, dev);
+ err = mlx4_priv_flow_start(priv);
priv_unlock(priv);
- return 0;
+ return -err;
}
/**
@@ -4021,6 +4026,7 @@ struct txq_mp2mr_mbuf_check_data {
rxq = (*priv->rxqs)[0];
r = priv->rxqs_n;
}
+ mlx4_priv_flow_stop(priv);
/* Iterate only once when RSS is enabled. */
do {
/* Ignore nonexistent RX queues. */
@@ -5022,6 +5028,55 @@ struct txq_mp2mr_mbuf_check_data {
return -ret;
}
+const struct rte_flow_ops mlx4_flow_ops = {
+ .validate = mlx4_flow_validate,
+ .create = mlx4_flow_create,
+ .destroy = mlx4_flow_destroy,
+ .flush = mlx4_flow_flush,
+ .query = NULL,
+};
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param filter_type
+ * Filter type.
+ * @param filter_op
+ * Operation to perform.
+ * @param arg
+ * Pointer to operation-specific structure.
+ *
+ * @return
+ * 0 on success, negative errno value on failure.
+ */
+static int
+mlx4_dev_filter_ctrl(struct rte_eth_dev *dev,
+ enum rte_filter_type filter_type,
+ enum rte_filter_op filter_op,
+ void *arg)
+{
+ int ret = EINVAL;
+
+ switch (filter_type) {
+ case RTE_ETH_FILTER_GENERIC:
+ if (filter_op != RTE_ETH_FILTER_GET)
+ return -EINVAL;
+ *(const void **)arg = &mlx4_flow_ops;
+ return 0;
+ case RTE_ETH_FILTER_FDIR:
+ DEBUG("%p: filter type FDIR is not supported by this PMD",
+ (void *)dev);
+ break;
+ default:
+ ERROR("%p: filter type (%d) not supported",
+ (void *)dev, filter_type);
+ break;
+ }
+ return -ret;
+}
+
static const struct eth_dev_ops mlx4_dev_ops = {
.dev_configure = mlx4_dev_configure,
.dev_start = mlx4_dev_start,
@@ -5056,6 +5111,7 @@ struct txq_mp2mr_mbuf_check_data {
.mac_addr_add = mlx4_mac_addr_add,
.mac_addr_set = mlx4_mac_addr_set,
.mtu_set = mlx4_dev_set_mtu,
+ .filter_ctrl = mlx4_dev_filter_ctrl,
};
/**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 70c9ecd..fac408b 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -295,6 +295,8 @@ struct txq {
struct ibv_exp_res_domain *rd; /* Resource Domain. */
};
+struct rte_flow;
+
struct priv {
struct rte_eth_dev *dev; /* Ethernet device. */
struct ibv_context *ctx; /* Verbs context. */
@@ -337,6 +339,7 @@ struct priv {
struct rxq *(*rxqs)[]; /* RX queues. */
struct txq *(*txqs)[]; /* TX queues. */
struct rte_intr_handle intr_handle; /* Interrupt handler. */
+ LIST_HEAD(mlx4_flows, rte_flow) flows;
rte_spinlock_t lock; /* Lock for control functions. */
};
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
new file mode 100644
index 0000000..2328a18
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -0,0 +1,1053 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_malloc.h>
+
+/* Generated configuration header. */
+#include "mlx4_autoconf.h"
+
+/* PMD headers. */
+#include "mlx4.h"
+#include "mlx4_flow.h"
+
+/** Static initializer for items. */
+#define ITEMS(...) \
+ (const enum rte_flow_item_type []){ \
+ __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
+ }
+
+/** Structure to generate a simple graph of layers supported by the NIC. */
+struct mlx4_flow_items {
+ /** List of possible actions for these items. */
+ const enum rte_flow_action_type *const actions;
+ /** Bit-masks corresponding to the possibilities for the item. */
+ const void *mask;
+ /**
+ * Default bit-masks to use when item->mask is not provided. When
+ * \default_mask is also NULL, the full supported bit-mask (\mask) is
+ * used instead.
+ */
+ const void *default_mask;
+ /** Bit-masks size in bytes. */
+ const unsigned int mask_sz;
+ /**
+ * Check support for a given item.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param mask[in]
+ * Bit-masks covering supported fields to compare with spec,
+ * last and mask in
+ * \item.
+ * @param size
+ * Bit-Mask size in bytes.
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+ int (*validate)(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size);
+ /**
+ * Conversion function from rte_flow to NIC specific flow.
+ *
+ * @param item
+ * rte_flow item to convert.
+ * @param default_mask
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data
+ * Internal structure to store the conversion.
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+ int (*convert)(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data);
+ /** Size in bytes of the destination structure. */
+ const unsigned int dst_sz;
+ /** List of possible following items. */
+ const enum rte_flow_item_type *const items;
+};
+
+/** Valid action for this PMD. */
+static const enum rte_flow_action_type valid_actions[] = {
+ RTE_FLOW_ACTION_TYPE_DROP,
+ RTE_FLOW_ACTION_TYPE_QUEUE,
+ RTE_FLOW_ACTION_TYPE_END,
+};
+
+/**
+ * Convert Ethernet item to Verbs specification.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param default_mask[in]
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ * User structure.
+ */
+static int
+mlx4_flow_create_eth(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data)
+{
+ const struct rte_flow_item_eth *spec = item->spec;
+ const struct rte_flow_item_eth *mask = item->mask;
+ struct mlx4_flow *flow = (struct mlx4_flow *)data;
+ struct ibv_flow_spec_eth *eth;
+ const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+ unsigned int i;
+
+ ++flow->ibv_attr->num_of_specs;
+ flow->ibv_attr->priority = 2;
+ eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+ *eth = (struct ibv_flow_spec_eth) {
+ .type = IBV_FLOW_SPEC_ETH,
+ .size = eth_size,
+ };
+ if (!spec) {
+ flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
+ return 0;
+ }
+ if (!mask)
+ mask = default_mask;
+ memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
+ memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+ memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
+ memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+ /* Remove unwanted bits from values. */
+ for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+ eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
+ eth->val.src_mac[i] &= eth->mask.src_mac[i];
+ }
+ return 0;
+}
+
+/**
+ * Convert VLAN item to Verbs specification.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param default_mask[in]
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ * User structure.
+ */
+static int
+mlx4_flow_create_vlan(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data)
+{
+ const struct rte_flow_item_vlan *spec = item->spec;
+ const struct rte_flow_item_vlan *mask = item->mask;
+ struct mlx4_flow *flow = (struct mlx4_flow *)data;
+ struct ibv_flow_spec_eth *eth;
+ const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+
+ eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
+ if (!spec)
+ return 0;
+ if (!mask)
+ mask = default_mask;
+ eth->val.vlan_tag = spec->tci;
+ eth->mask.vlan_tag = mask->tci;
+ eth->val.vlan_tag &= eth->mask.vlan_tag;
+ return 0;
+}
+
+/**
+ * Convert IPv4 item to Verbs specification.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param default_mask[in]
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ * User structure.
+ */
+static int
+mlx4_flow_create_ipv4(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data)
+{
+ const struct rte_flow_item_ipv4 *spec = item->spec;
+ const struct rte_flow_item_ipv4 *mask = item->mask;
+ struct mlx4_flow *flow = (struct mlx4_flow *)data;
+ struct ibv_flow_spec_ipv4 *ipv4;
+ unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4);
+
+ ++flow->ibv_attr->num_of_specs;
+ flow->ibv_attr->priority = 1;
+ ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+ *ipv4 = (struct ibv_flow_spec_ipv4) {
+ .type = IBV_FLOW_SPEC_IPV4,
+ .size = ipv4_size,
+ };
+ if (!spec)
+ return 0;
+ ipv4->val = (struct ibv_flow_ipv4_filter) {
+ .src_ip = spec->hdr.src_addr,
+ .dst_ip = spec->hdr.dst_addr,
+ };
+ if (!mask)
+ mask = default_mask;
+ ipv4->mask = (struct ibv_flow_ipv4_filter) {
+ .src_ip = mask->hdr.src_addr,
+ .dst_ip = mask->hdr.dst_addr,
+ };
+ /* Remove unwanted bits from values. */
+ ipv4->val.src_ip &= ipv4->mask.src_ip;
+ ipv4->val.dst_ip &= ipv4->mask.dst_ip;
+ return 0;
+}
+
+/**
+ * Convert UDP item to Verbs specification.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param default_mask[in]
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ * User structure.
+ */
+static int
+mlx4_flow_create_udp(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data)
+{
+ const struct rte_flow_item_udp *spec = item->spec;
+ const struct rte_flow_item_udp *mask = item->mask;
+ struct mlx4_flow *flow = (struct mlx4_flow *)data;
+ struct ibv_flow_spec_tcp_udp *udp;
+ unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+
+ ++flow->ibv_attr->num_of_specs;
+ flow->ibv_attr->priority = 0;
+ udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+ *udp = (struct ibv_flow_spec_tcp_udp) {
+ .type = IBV_FLOW_SPEC_UDP,
+ .size = udp_size,
+ };
+ if (!spec)
+ return 0;
+ udp->val.dst_port = spec->hdr.dst_port;
+ udp->val.src_port = spec->hdr.src_port;
+ if (!mask)
+ mask = default_mask;
+ udp->mask.dst_port = mask->hdr.dst_port;
+ udp->mask.src_port = mask->hdr.src_port;
+ /* Remove unwanted bits from values. */
+ udp->val.src_port &= udp->mask.src_port;
+ udp->val.dst_port &= udp->mask.dst_port;
+ return 0;
+}
+
+/**
+ * Convert TCP item to Verbs specification.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param default_mask[in]
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ * User structure.
+ */
+static int
+mlx4_flow_create_tcp(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data)
+{
+ const struct rte_flow_item_tcp *spec = item->spec;
+ const struct rte_flow_item_tcp *mask = item->mask;
+ struct mlx4_flow *flow = (struct mlx4_flow *)data;
+ struct ibv_flow_spec_tcp_udp *tcp;
+ unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+
+ ++flow->ibv_attr->num_of_specs;
+ flow->ibv_attr->priority = 0;
+ tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+ *tcp = (struct ibv_flow_spec_tcp_udp) {
+ .type = IBV_FLOW_SPEC_TCP,
+ .size = tcp_size,
+ };
+ if (!spec)
+ return 0;
+ tcp->val.dst_port = spec->hdr.dst_port;
+ tcp->val.src_port = spec->hdr.src_port;
+ if (!mask)
+ mask = default_mask;
+ tcp->mask.dst_port = mask->hdr.dst_port;
+ tcp->mask.src_port = mask->hdr.src_port;
+ /* Remove unwanted bits from values. */
+ tcp->val.src_port &= tcp->mask.src_port;
+ tcp->val.dst_port &= tcp->mask.dst_port;
+ return 0;
+}
+
+/**
+ * Check support for a given item.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param mask[in]
+ * Bit-masks covering supported fields to compare with spec, last and mask in
+ * \item.
+ * @param size
+ * Bit-Mask size in bytes.
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+static int
+mlx4_flow_item_validate(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ int ret = 0;
+
+ if (!item->spec && (item->mask || item->last))
+ return -1;
+ if (item->spec && !item->mask) {
+ unsigned int i;
+ const uint8_t *spec = item->spec;
+
+ for (i = 0; i < size; ++i)
+ if ((spec[i] | mask[i]) != mask[i])
+ return -1;
+ }
+ if (item->last && !item->mask) {
+ unsigned int i;
+ const uint8_t *spec = item->last;
+
+ for (i = 0; i < size; ++i)
+ if ((spec[i] | mask[i]) != mask[i])
+ return -1;
+ }
+ if (item->spec && item->last) {
+ uint8_t spec[size];
+ uint8_t last[size];
+ const uint8_t *apply = mask;
+ unsigned int i;
+
+ if (item->mask)
+ apply = item->mask;
+ for (i = 0; i < size; ++i) {
+ spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
+ last[i] = ((const uint8_t *)item->last)[i] & apply[i];
+ }
+ ret = memcmp(spec, last, size);
+ }
+ return ret;
+}
+
+static int
+mlx4_flow_validate_eth(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ if (item->mask) {
+ const struct rte_flow_item_eth *mask = item->mask;
+
+ if (mask->dst.addr_bytes[0] != 0xff ||
+ mask->dst.addr_bytes[1] != 0xff ||
+ mask->dst.addr_bytes[2] != 0xff ||
+ mask->dst.addr_bytes[3] != 0xff ||
+ mask->dst.addr_bytes[4] != 0xff ||
+ mask->dst.addr_bytes[5] != 0xff)
+ return -1;
+ }
+ return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_vlan(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ if (item->mask) {
+ const struct rte_flow_item_vlan *mask = item->mask;
+
+ if (mask->tci != 0 &&
+ ntohs(mask->tci) != 0x0fff)
+ return -1;
+ }
+ return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_ipv4(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ if (item->mask) {
+ const struct rte_flow_item_ipv4 *mask = item->mask;
+
+ if (mask->hdr.src_addr != 0 &&
+ mask->hdr.src_addr != 0xffffffff)
+ return -1;
+ if (mask->hdr.dst_addr != 0 &&
+ mask->hdr.dst_addr != 0xffffffff)
+ return -1;
+ }
+ return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_udp(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ if (item->mask) {
+ const struct rte_flow_item_udp *mask = item->mask;
+
+ if (mask->hdr.src_port != 0 &&
+ mask->hdr.src_port != 0xffff)
+ return -1;
+ if (mask->hdr.dst_port != 0 &&
+ mask->hdr.dst_port != 0xffff)
+ return -1;
+ }
+ return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_tcp(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ if (item->mask) {
+ const struct rte_flow_item_tcp *mask = item->mask;
+
+ if (mask->hdr.src_port != 0 &&
+ mask->hdr.src_port != 0xffff)
+ return -1;
+ if (mask->hdr.dst_port != 0 &&
+ mask->hdr.dst_port != 0xffff)
+ return -1;
+ }
+ return mlx4_flow_item_validate(item, mask, size);
+}
+
+/** Graph of supported items and associated actions. */
+static const struct mlx4_flow_items mlx4_flow_items[] = {
+ [RTE_FLOW_ITEM_TYPE_END] = {
+ .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
+ },
+ [RTE_FLOW_ITEM_TYPE_ETH] = {
+ .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
+ RTE_FLOW_ITEM_TYPE_IPV4),
+ .actions = valid_actions,
+ .mask = &(const struct rte_flow_item_eth){
+ .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ },
+ .default_mask = &rte_flow_item_eth_mask,
+ .mask_sz = sizeof(struct rte_flow_item_eth),
+ .validate = mlx4_flow_validate_eth,
+ .convert = mlx4_flow_create_eth,
+ .dst_sz = sizeof(struct ibv_flow_spec_eth),
+ },
+ [RTE_FLOW_ITEM_TYPE_VLAN] = {
+ .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4),
+ .actions = valid_actions,
+ .mask = &(const struct rte_flow_item_vlan){
+ /* rte_flow_item_vlan_mask is invalid for mlx4. */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ .tci = 0x0fff,
+#else
+ .tci = 0xff0f,
+#endif
+ },
+ .mask_sz = sizeof(struct rte_flow_item_vlan),
+ .validate = mlx4_flow_validate_vlan,
+ .convert = mlx4_flow_create_vlan,
+ .dst_sz = 0,
+ },
+ [RTE_FLOW_ITEM_TYPE_IPV4] = {
+ .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
+ RTE_FLOW_ITEM_TYPE_TCP),
+ .actions = valid_actions,
+ .mask = &(const struct rte_flow_item_ipv4){
+ .hdr = {
+ .src_addr = -1,
+ .dst_addr = -1,
+ },
+ },
+ .default_mask = &rte_flow_item_ipv4_mask,
+ .mask_sz = sizeof(struct rte_flow_item_ipv4),
+ .validate = mlx4_flow_validate_ipv4,
+ .convert = mlx4_flow_create_ipv4,
+ .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
+ },
+ [RTE_FLOW_ITEM_TYPE_UDP] = {
+ .actions = valid_actions,
+ .mask = &(const struct rte_flow_item_udp){
+ .hdr = {
+ .src_port = -1,
+ .dst_port = -1,
+ },
+ },
+ .default_mask = &rte_flow_item_udp_mask,
+ .mask_sz = sizeof(struct rte_flow_item_udp),
+ .validate = mlx4_flow_validate_udp,
+ .convert = mlx4_flow_create_udp,
+ .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
+ },
+ [RTE_FLOW_ITEM_TYPE_TCP] = {
+ .actions = valid_actions,
+ .mask = &(const struct rte_flow_item_tcp){
+ .hdr = {
+ .src_port = -1,
+ .dst_port = -1,
+ },
+ },
+ .default_mask = &rte_flow_item_tcp_mask,
+ .mask_sz = sizeof(struct rte_flow_item_tcp),
+ .validate = mlx4_flow_validate_tcp,
+ .convert = mlx4_flow_create_tcp,
+ .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
+ },
+};
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ * @param[in, out] flow
+ * Flow structure to update.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_validate(struct priv *priv,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error,
+ struct mlx4_flow *flow)
+{
+ const struct mlx4_flow_items *cur_item = mlx4_flow_items;
+ struct mlx4_flow_action action = {
+ .queue = 0,
+ .drop = 0,
+ };
+
+ (void)priv;
+ if (attr->group) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+ NULL,
+ "groups are not supported");
+ return -rte_errno;
+ }
+ if (attr->priority) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+ NULL,
+ "priorities are not supported");
+ return -rte_errno;
+ }
+ if (attr->egress) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+ NULL,
+ "egress is not supported");
+ return -rte_errno;
+ }
+ if (!attr->ingress) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+ NULL,
+ "only ingress is supported");
+ return -rte_errno;
+ }
+ /* Go over items list. */
+ for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+ const struct mlx4_flow_items *token = NULL;
+ unsigned int i;
+ int err;
+
+ if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+ continue;
+ /*
+ * The nic can support patterns with NULL eth spec only
+ * if eth is a single item in a rule.
+ */
+ if (!items->spec &&
+ items->type == RTE_FLOW_ITEM_TYPE_ETH) {
+ const struct rte_flow_item *next = items + 1;
+
+ if (next->type != RTE_FLOW_ITEM_TYPE_END) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ items,
+ "the rule requires"
+ " an Ethernet spec");
+ return -rte_errno;
+ }
+ }
+ for (i = 0;
+ cur_item->items &&
+ cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
+ ++i) {
+ if (cur_item->items[i] == items->type) {
+ token = &mlx4_flow_items[items->type];
+ break;
+ }
+ }
+ if (!token)
+ goto exit_item_not_supported;
+ cur_item = token;
+ err = cur_item->validate(items,
+ (const uint8_t *)cur_item->mask,
+ cur_item->mask_sz);
+ if (err)
+ goto exit_item_not_supported;
+ if (flow->ibv_attr && cur_item->convert) {
+ err = cur_item->convert(items,
+ (cur_item->default_mask ?
+ cur_item->default_mask :
+ cur_item->mask),
+ flow);
+ if (err)
+ goto exit_item_not_supported;
+ }
+ flow->offset += cur_item->dst_sz;
+ }
+ /* Go over actions list */
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+ if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+ continue;
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+ action.drop = 1;
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+ const struct rte_flow_action_queue *queue =
+ (const struct rte_flow_action_queue *)
+ actions->conf;
+
+ if (!queue || (queue->index > (priv->rxqs_n - 1)))
+ goto exit_action_not_supported;
+ action.queue = 1;
+ } else {
+ goto exit_action_not_supported;
+ }
+ }
+ if (!action.queue && !action.drop) {
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "no valid action");
+ return -rte_errno;
+ }
+ return 0;
+exit_item_not_supported:
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+ items, "item not supported");
+ return -rte_errno;
+exit_action_not_supported:
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ actions, "action not supported");
+ return -rte_errno;
+}
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+ int ret;
+ struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr) };
+
+ priv_lock(priv);
+ ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
+ priv_unlock(priv);
+ return ret;
+}
+
+/**
+ * Complete flow rule creation.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param ibv_attr
+ * Verbs flow attributes.
+ * @param action
+ * Target action structure.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * A flow if the rule could be created.
+ */
+static struct rte_flow *
+priv_flow_create_action_queue(struct priv *priv,
+ struct ibv_flow_attr *ibv_attr,
+ struct mlx4_flow_action *action,
+ struct rte_flow_error *error)
+{
+ struct rxq *rxq;
+ struct ibv_qp *qp;
+ struct rte_flow *rte_flow;
+
+ assert(priv->pd);
+ assert(priv->ctx);
+ rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
+ if (!rte_flow) {
+ rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "cannot allocate flow memory");
+ return NULL;
+ }
+ rxq = (*priv->rxqs)[action->queue_id];
+ if (action->drop) {
+ rte_flow->cq =
+ ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
+ &(struct ibv_exp_cq_init_attr){
+ .comp_mask = 0,
+ });
+ if (!rte_flow->cq) {
+ rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "cannot allocate CQ");
+ goto error;
+ }
+ rte_flow->qp = ibv_exp_create_qp(
+ priv->ctx,
+ &(struct ibv_exp_qp_init_attr){
+ .send_cq = rte_flow->cq,
+ .recv_cq = rte_flow->cq,
+ .cap = {
+ .max_recv_wr = 1,
+ .max_recv_sge = 1,
+ },
+ .qp_type = IBV_QPT_RAW_PACKET,
+ .comp_mask =
+ IBV_EXP_QP_INIT_ATTR_PD |
+ IBV_EXP_QP_INIT_ATTR_PORT |
+ IBV_EXP_QP_INIT_ATTR_RES_DOMAIN,
+ .pd = priv->pd,
+ .res_domain = rxq->rd,
+ .port_num = priv->port,
+ });
+ if (!rte_flow->qp) {
+ rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "cannot allocate QP");
+ goto error;
+ }
+ qp = rte_flow->qp;
+ } else {
+ rte_flow->rxq = rxq;
+ qp = rxq->qp;
+ }
+ rte_flow->ibv_attr = ibv_attr;
+ rte_flow->ibv_flow = ibv_create_flow(qp, rte_flow->ibv_attr);
+ if (!rte_flow->ibv_flow) {
+ rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "flow rule creation failure");
+ goto error;
+ }
+ return rte_flow;
+
+error:
+ assert(rte_flow);
+ if (rte_flow->cq)
+ ibv_destroy_cq(rte_flow->cq);
+ if (rte_flow->qp)
+ ibv_destroy_qp(rte_flow->qp);
+ rte_free(rte_flow->ibv_attr);
+ rte_free(rte_flow);
+ return NULL;
+}
+
+/**
+ * Convert a flow.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * A flow on success, NULL otherwise.
+ */
+static struct rte_flow *
+priv_flow_create(struct priv *priv,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct rte_flow *rte_flow;
+ struct mlx4_flow_action action;
+ struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr), };
+ int err;
+
+ err = priv_flow_validate(priv, attr, items, actions, error, &flow);
+ if (err)
+ return NULL;
+ flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
+ if (!flow.ibv_attr) {
+ rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "cannot allocate ibv_attr memory");
+ return NULL;
+ }
+ flow.offset = sizeof(struct ibv_flow_attr);
+ *flow.ibv_attr = (struct ibv_flow_attr){
+ .comp_mask = 0,
+ .type = IBV_FLOW_ATTR_NORMAL,
+ .size = sizeof(struct ibv_flow_attr),
+ .priority = attr->priority,
+ .num_of_specs = 0,
+ .port = priv->port,
+ .flags = 0,
+ };
+ claim_zero(priv_flow_validate(priv, attr, items, actions,
+ error, &flow));
+ action = (struct mlx4_flow_action){
+ .queue = 0,
+ .drop = 0,
+ };
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+ if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+ continue;
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+ action.queue = 1;
+ action.queue_id =
+ ((const struct rte_flow_action_queue *)
+ actions->conf)->index;
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+ action.drop = 1;
+ } else {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions, "unsupported action");
+ goto exit;
+ }
+ }
+ rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
+ &action, error);
+ return rte_flow;
+exit:
+ rte_free(flow.ibv_attr);
+ return NULL;
+}
+
+/**
+ * Create a flow.
+ *
+ * @see rte_flow_create()
+ * @see rte_flow_ops
+ */
+struct rte_flow *
+mlx4_flow_create(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct rte_flow *flow;
+
+ priv_lock(priv);
+ flow = priv_flow_create(priv, attr, items, actions, error);
+ if (flow) {
+ LIST_INSERT_HEAD(&priv->flows, flow, next);
+ DEBUG("Flow created %p", (void *)flow);
+ }
+ priv_unlock(priv);
+ return flow;
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param[in] flow
+ * Flow to destroy.
+ */
+static void
+priv_flow_destroy(struct priv *priv, struct rte_flow *flow)
+{
+ (void)priv;
+ LIST_REMOVE(flow, next);
+ if (flow->ibv_flow)
+ claim_zero(ibv_destroy_flow(flow->ibv_flow));
+ if (flow->qp)
+ claim_zero(ibv_destroy_qp(flow->qp));
+ if (flow->cq)
+ claim_zero(ibv_destroy_cq(flow->cq));
+ rte_free(flow->ibv_attr);
+ DEBUG("Flow destroyed %p", (void *)flow);
+ rte_free(flow);
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_destroy(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+
+ (void)error;
+ priv_lock(priv);
+ priv_flow_destroy(priv, flow);
+ priv_unlock(priv);
+ return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @param priv
+ * Pointer to private structure.
+ */
+static void
+priv_flow_flush(struct priv *priv)
+{
+ while (!LIST_EMPTY(&priv->flows)) {
+ struct rte_flow *flow;
+
+ flow = LIST_FIRST(&priv->flows);
+ priv_flow_destroy(priv, flow);
+ }
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+
+ (void)error;
+ priv_lock(priv);
+ priv_flow_flush(priv);
+ priv_unlock(priv);
+ return 0;
+}
+
+/**
+ * Remove all flows.
+ *
+ * Called by dev_stop() to remove all flows.
+ *
+ * @param priv
+ * Pointer to private structure.
+ */
+void
+mlx4_priv_flow_stop(struct priv *priv)
+{
+ struct rte_flow *flow;
+
+ for (flow = LIST_FIRST(&priv->flows);
+ flow;
+ flow = LIST_NEXT(flow, next)) {
+ claim_zero(ibv_destroy_flow(flow->ibv_flow));
+ flow->ibv_flow = NULL;
+ DEBUG("Flow %p removed", (void *)flow);
+ }
+}
+
+/**
+ * Add all flows.
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, a errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_priv_flow_start(struct priv *priv)
+{
+ struct ibv_qp *qp;
+ struct rte_flow *flow;
+
+ for (flow = LIST_FIRST(&priv->flows);
+ flow;
+ flow = LIST_NEXT(flow, next)) {
+ qp = flow->qp ? flow->qp : flow->rxq->qp;
+ flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
+ if (!flow->ibv_flow) {
+ DEBUG("Flow %p cannot be applied", (void *)flow);
+ rte_errno = EINVAL;
+ return rte_errno;
+ }
+ DEBUG("Flow %p applied", (void *)flow);
+ }
+ return 0;
+}
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
new file mode 100644
index 0000000..537ffdf
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -0,0 +1,104 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_PMD_MLX4_FLOW_H_
+#define RTE_PMD_MLX4_FLOW_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/queue.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_byteorder.h>
+
+#include "mlx4.h"
+
+struct rte_flow {
+ LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
+ struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
+ struct ibv_flow *ibv_flow; /**< Verbs flow. */
+ struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+ struct ibv_qp *qp; /**< Verbs queue pair. */
+ struct ibv_cq *cq; /**< Verbs completion queue. */
+};
+
+int
+mlx4_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error);
+
+struct rte_flow *
+mlx4_flow_create(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error);
+
+int
+mlx4_flow_destroy(struct rte_eth_dev *,
+ struct rte_flow *,
+ struct rte_flow_error *);
+
+int
+mlx4_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error);
+
+/** Structure to pass to the conversion function. */
+struct mlx4_flow {
+ struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
+ unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
+};
+
+struct mlx4_flow_action {
+ uint32_t drop:1; /**< Target is a drop queue. */
+ uint32_t queue:1; /**< Target is a receive queue. */
+ uint32_t queue_id; /**< Identifier of the queue. */
+};
+
+int mlx4_priv_flow_start(struct priv *);
+void mlx4_priv_flow_stop(struct priv *);
+
+#endif /* RTE_PMD_MLX4_FLOW_H_ */
--
1.8.3.1
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dpdk-dev] [PATCH 2/2] net/mlx4: support basic flow items and actions
2017-02-21 14:07 ` [dpdk-dev] [PATCH 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
@ 2017-02-22 8:37 ` Nélio Laranjeiro
2017-02-22 10:10 ` Nélio Laranjeiro
0 siblings, 1 reply; 15+ messages in thread
From: Nélio Laranjeiro @ 2017-02-22 8:37 UTC (permalink / raw)
To: Vasily Philipov; +Cc: dev, Adrien Mazarguil
On Tue, Feb 21, 2017 at 02:07:03PM +0000, Vasily Philipov wrote:
> Adding support for the next items: eth, vlan, ipv4, udp, tcp and for the
> next actions: queue, drop
>
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> ---
> drivers/net/mlx4/Makefile | 3 +-
> drivers/net/mlx4/mlx4.c | 60 ++-
> drivers/net/mlx4/mlx4.h | 3 +
> drivers/net/mlx4/mlx4_flow.c | 1053 ++++++++++++++++++++++++++++++++++++++++++
> drivers/net/mlx4/mlx4_flow.h | 104 +++++
> 5 files changed, 1220 insertions(+), 3 deletions(-)
> create mode 100644 drivers/net/mlx4/mlx4_flow.c
> create mode 100644 drivers/net/mlx4/mlx4_flow.h
>
> diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
> index 68c5902..1d463f7 100644
> --- a/drivers/net/mlx4/Makefile
> +++ b/drivers/net/mlx4/Makefile
> @@ -36,6 +36,7 @@ LIB = librte_pmd_mlx4.a
>
> # Sources.
> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c
> +SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_flow.c
>
> # Dependencies.
> DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += lib/librte_ether
> @@ -129,7 +130,7 @@ mlx4_autoconf.h: mlx4_autoconf.h.new
> cmp '$<' '$@' $(AUTOCONF_OUTPUT) || \
> mv '$<' '$@'
>
> -mlx4.o: mlx4_autoconf.h
> +$(SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD):.c=.o): mlx4_autoconf.h
>
> clean_mlx4: FORCE
> $Q rm -f -- mlx4_autoconf.h mlx4_autoconf.h.new
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index 82ccac8..cc2ebfa 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -82,12 +82,14 @@
> #include <rte_log.h>
> #include <rte_alarm.h>
> #include <rte_memory.h>
> +#include <rte_flow.h>
>
> /* Generated configuration header. */
> #include "mlx4_autoconf.h"
>
> -/* PMD header. */
> +/* PMD headers. */
> #include "mlx4.h"
> +#include "mlx4_flow.h"
>
> /* Convenience macros for accessing mbuf fields. */
> #define NEXT(m) ((m)->next)
> @@ -2351,6 +2353,7 @@ struct txq_mp2mr_mbuf_check_data {
> assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
> *attr = (struct ibv_flow_attr){
> .type = IBV_FLOW_ATTR_NORMAL,
> + .priority = 3,
> .num_of_specs = 1,
> .port = priv->port,
> .flags = 0
> @@ -3936,6 +3939,7 @@ struct txq_mp2mr_mbuf_check_data {
> {
> struct priv *priv = dev->data->dev_private;
> unsigned int i = 0;
> + unsigned int err = 0;
> unsigned int r;
> struct rxq *rxq;
>
> @@ -3985,8 +3989,9 @@ struct txq_mp2mr_mbuf_check_data {
> return -ret;
> } while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
> priv_dev_interrupt_handler_install(priv, dev);
> + err = mlx4_priv_flow_start(priv);
> priv_unlock(priv);
> - return 0;
> + return -err;
> }
>
> /**
> @@ -4021,6 +4026,7 @@ struct txq_mp2mr_mbuf_check_data {
> rxq = (*priv->rxqs)[0];
> r = priv->rxqs_n;
> }
> + mlx4_priv_flow_stop(priv);
> /* Iterate only once when RSS is enabled. */
> do {
> /* Ignore nonexistent RX queues. */
> @@ -5022,6 +5028,55 @@ struct txq_mp2mr_mbuf_check_data {
> return -ret;
> }
>
> +const struct rte_flow_ops mlx4_flow_ops = {
> + .validate = mlx4_flow_validate,
> + .create = mlx4_flow_create,
> + .destroy = mlx4_flow_destroy,
> + .flush = mlx4_flow_flush,
> + .query = NULL,
> +};
> +
> +/**
> + * Manage filter operations.
> + *
> + * @param dev
> + * Pointer to Ethernet device structure.
> + * @param filter_type
> + * Filter type.
> + * @param filter_op
> + * Operation to perform.
> + * @param arg
> + * Pointer to operation-specific structure.
> + *
> + * @return
> + * 0 on success, negative errno value on failure.
> + */
> +static int
> +mlx4_dev_filter_ctrl(struct rte_eth_dev *dev,
> + enum rte_filter_type filter_type,
> + enum rte_filter_op filter_op,
> + void *arg)
> +{
> + int ret = EINVAL;
> +
> + switch (filter_type) {
> + case RTE_ETH_FILTER_GENERIC:
> + if (filter_op != RTE_ETH_FILTER_GET)
> + return -EINVAL;
> + *(const void **)arg = &mlx4_flow_ops;
> + return 0;
> + case RTE_ETH_FILTER_FDIR:
> + DEBUG("%p: filter type FDIR is not supported by this PMD",
> + (void *)dev);
> + break;
> + default:
> + ERROR("%p: filter type (%d) not supported",
> + (void *)dev, filter_type);
> + break;
> + }
> + return -ret;
> +}
> +
> static const struct eth_dev_ops mlx4_dev_ops = {
> .dev_configure = mlx4_dev_configure,
> .dev_start = mlx4_dev_start,
> @@ -5056,6 +5111,7 @@ struct txq_mp2mr_mbuf_check_data {
> .mac_addr_add = mlx4_mac_addr_add,
> .mac_addr_set = mlx4_mac_addr_set,
> .mtu_set = mlx4_dev_set_mtu,
> + .filter_ctrl = mlx4_dev_filter_ctrl,
> };
>
> /**
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index 70c9ecd..fac408b 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -295,6 +295,8 @@ struct txq {
> struct ibv_exp_res_domain *rd; /* Resource Domain. */
> };
>
> +struct rte_flow;
> +
> struct priv {
> struct rte_eth_dev *dev; /* Ethernet device. */
> struct ibv_context *ctx; /* Verbs context. */
> @@ -337,6 +339,7 @@ struct priv {
> struct rxq *(*rxqs)[]; /* RX queues. */
> struct txq *(*txqs)[]; /* TX queues. */
> struct rte_intr_handle intr_handle; /* Interrupt handler. */
> + LIST_HEAD(mlx4_flows, rte_flow) flows;
> rte_spinlock_t lock; /* Lock for control functions. */
> };
>
> diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
> new file mode 100644
> index 0000000..2328a18
> --- /dev/null
> +++ b/drivers/net/mlx4/mlx4_flow.c
> @@ -0,0 +1,1053 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright 2017 6WIND S.A.
> + * Copyright 2017 Mellanox.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of 6WIND S.A. nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <assert.h>
> +
> +/* Verbs header. */
> +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic ignored "-Wpedantic"
> +#endif
> +#include <infiniband/verbs.h>
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic error "-Wpedantic"
> +#endif
> +
> +#include <rte_flow.h>
> +#include <rte_flow_driver.h>
> +#include <rte_malloc.h>
> +
> +/* Generated configuration header. */
> +#include "mlx4_autoconf.h"
> +
> +/* PMD headers. */
> +#include "mlx4.h"
> +#include "mlx4_flow.h"
> +
> +/** Static initializer for items. */
> +#define ITEMS(...) \
> + (const enum rte_flow_item_type []){ \
> + __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
> + }
> +
> +/** Structure to generate a simple graph of layers supported by the NIC. */
> +struct mlx4_flow_items {
> + /** List of possible actions for these items. */
> + const enum rte_flow_action_type *const actions;
> + /** Bit-masks corresponding to the possibilities for the item. */
> + const void *mask;
> + /**
> + * Default bit-masks to use when item->mask is not provided. When
> + * \default_mask is also NULL, the full supported bit-mask (\mask) is
> + * used instead.
> + */
> + const void *default_mask;
> + /** Bit-masks size in bytes. */
> + const unsigned int mask_sz;
> + /**
> + * Check support for a given item.
> + *
> + * @param item[in]
> + * Item specification.
> + * @param mask[in]
> + * Bit-masks covering supported fields to compare with spec,
> + * last and mask in
> + * \item.
> + * @param size
> + * Bit-Mask size in bytes.
> + *
> + * @return
> + * 0 on success, negative value otherwise.
> + */
> + int (*validate)(const struct rte_flow_item *item,
> + const uint8_t *mask, unsigned int size);
> + /**
> + * Conversion function from rte_flow to NIC specific flow.
> + *
> + * @param item
> + * rte_flow item to convert.
> + * @param default_mask
> + * Default bit-masks to use when item->mask is not provided.
> + * @param data
> + * Internal structure to store the conversion.
> + *
> + * @return
> + * 0 on success, negative value otherwise.
> + */
> + int (*convert)(const struct rte_flow_item *item,
> + const void *default_mask,
> + void *data);
> + /** Size in bytes of the destination structure. */
> + const unsigned int dst_sz;
> + /** List of possible following items. */
> + const enum rte_flow_item_type *const items;
> +};
> +
> +/** Valid action for this PMD. */
> +static const enum rte_flow_action_type valid_actions[] = {
> + RTE_FLOW_ACTION_TYPE_DROP,
> + RTE_FLOW_ACTION_TYPE_QUEUE,
> + RTE_FLOW_ACTION_TYPE_END,
> +};
> +
> +/**
> + * Convert Ethernet item to Verbs specification.
> + *
> + * @param item[in]
> + * Item specification.
> + * @param default_mask[in]
> + * Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + * User structure.
> + */
> +static int
> +mlx4_flow_create_eth(const struct rte_flow_item *item,
> + const void *default_mask,
> + void *data)
> +{
> + const struct rte_flow_item_eth *spec = item->spec;
> + const struct rte_flow_item_eth *mask = item->mask;
> + struct mlx4_flow *flow = (struct mlx4_flow *)data;
> + struct ibv_flow_spec_eth *eth;
> + const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
> + unsigned int i;
> +
> + ++flow->ibv_attr->num_of_specs;
> + flow->ibv_attr->priority = 2;
> + eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> + *eth = (struct ibv_flow_spec_eth) {
> + .type = IBV_FLOW_SPEC_ETH,
> + .size = eth_size,
> + };
> + if (!spec) {
> + flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
> + return 0;
> + }
> + if (!mask)
> + mask = default_mask;
> + memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
> + memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
> + memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
> + memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
> + /* Remove unwanted bits from values. */
> + for (i = 0; i < ETHER_ADDR_LEN; ++i) {
> + eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
> + eth->val.src_mac[i] &= eth->mask.src_mac[i];
> + }
> + return 0;
> +}
> +
> +/**
> + * Convert VLAN item to Verbs specification.
> + *
> + * @param item[in]
> + * Item specification.
> + * @param default_mask[in]
> + * Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + * User structure.
> + */
> +static int
> +mlx4_flow_create_vlan(const struct rte_flow_item *item,
> + const void *default_mask,
> + void *data)
> +{
> + const struct rte_flow_item_vlan *spec = item->spec;
> + const struct rte_flow_item_vlan *mask = item->mask;
> + struct mlx4_flow *flow = (struct mlx4_flow *)data;
> + struct ibv_flow_spec_eth *eth;
> + const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
> +
> + eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
> + if (!spec)
> + return 0;
> + if (!mask)
> + mask = default_mask;
> + eth->val.vlan_tag = spec->tci;
> + eth->mask.vlan_tag = mask->tci;
> + eth->val.vlan_tag &= eth->mask.vlan_tag;
> + return 0;
> +}
> +
> +/**
> + * Convert IPv4 item to Verbs specification.
> + *
> + * @param item[in]
> + * Item specification.
> + * @param default_mask[in]
> + * Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + * User structure.
> + */
> +static int
> +mlx4_flow_create_ipv4(const struct rte_flow_item *item,
> + const void *default_mask,
> + void *data)
> +{
> + const struct rte_flow_item_ipv4 *spec = item->spec;
> + const struct rte_flow_item_ipv4 *mask = item->mask;
> + struct mlx4_flow *flow = (struct mlx4_flow *)data;
> + struct ibv_flow_spec_ipv4 *ipv4;
> + unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4);
> +
> + ++flow->ibv_attr->num_of_specs;
> + flow->ibv_attr->priority = 1;
> + ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> + *ipv4 = (struct ibv_flow_spec_ipv4) {
> + .type = IBV_FLOW_SPEC_IPV4,
> + .size = ipv4_size,
> + };
> + if (!spec)
> + return 0;
> + ipv4->val = (struct ibv_flow_ipv4_filter) {
> + .src_ip = spec->hdr.src_addr,
> + .dst_ip = spec->hdr.dst_addr,
> + };
> + if (!mask)
> + mask = default_mask;
> + ipv4->mask = (struct ibv_flow_ipv4_filter) {
> + .src_ip = mask->hdr.src_addr,
> + .dst_ip = mask->hdr.dst_addr,
> + };
> + /* Remove unwanted bits from values. */
> + ipv4->val.src_ip &= ipv4->mask.src_ip;
> + ipv4->val.dst_ip &= ipv4->mask.dst_ip;
> + return 0;
> +}
> +
> +/**
> + * Convert UDP item to Verbs specification.
> + *
> + * @param item[in]
> + * Item specification.
> + * @param default_mask[in]
> + * Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + * User structure.
> + */
> +static int
> +mlx4_flow_create_udp(const struct rte_flow_item *item,
> + const void *default_mask,
> + void *data)
> +{
> + const struct rte_flow_item_udp *spec = item->spec;
> + const struct rte_flow_item_udp *mask = item->mask;
> + struct mlx4_flow *flow = (struct mlx4_flow *)data;
> + struct ibv_flow_spec_tcp_udp *udp;
> + unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
> +
> + ++flow->ibv_attr->num_of_specs;
> + flow->ibv_attr->priority = 0;
> + udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> + *udp = (struct ibv_flow_spec_tcp_udp) {
> + .type = IBV_FLOW_SPEC_UDP,
> + .size = udp_size,
> + };
> + if (!spec)
> + return 0;
> + udp->val.dst_port = spec->hdr.dst_port;
> + udp->val.src_port = spec->hdr.src_port;
> + if (!mask)
> + mask = default_mask;
> + udp->mask.dst_port = mask->hdr.dst_port;
> + udp->mask.src_port = mask->hdr.src_port;
> + /* Remove unwanted bits from values. */
> + udp->val.src_port &= udp->mask.src_port;
> + udp->val.dst_port &= udp->mask.dst_port;
> + return 0;
> +}
> +
> +/**
> + * Convert TCP item to Verbs specification.
> + *
> + * @param item[in]
> + * Item specification.
> + * @param default_mask[in]
> + * Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + * User structure.
> + */
> +static int
> +mlx4_flow_create_tcp(const struct rte_flow_item *item,
> + const void *default_mask,
> + void *data)
> +{
> + const struct rte_flow_item_tcp *spec = item->spec;
> + const struct rte_flow_item_tcp *mask = item->mask;
> + struct mlx4_flow *flow = (struct mlx4_flow *)data;
> + struct ibv_flow_spec_tcp_udp *tcp;
> + unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
> +
> + ++flow->ibv_attr->num_of_specs;
> + flow->ibv_attr->priority = 0;
> + tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> + *tcp = (struct ibv_flow_spec_tcp_udp) {
> + .type = IBV_FLOW_SPEC_TCP,
> + .size = tcp_size,
> + };
> + if (!spec)
> + return 0;
> + tcp->val.dst_port = spec->hdr.dst_port;
> + tcp->val.src_port = spec->hdr.src_port;
> + if (!mask)
> + mask = default_mask;
> + tcp->mask.dst_port = mask->hdr.dst_port;
> + tcp->mask.src_port = mask->hdr.src_port;
> + /* Remove unwanted bits from values. */
> + tcp->val.src_port &= tcp->mask.src_port;
> + tcp->val.dst_port &= tcp->mask.dst_port;
> + return 0;
> +}
> +
> +/**
> + * Check support for a given item.
> + *
> + * @param item[in]
> + * Item specification.
> + * @param mask[in]
> + * Bit-masks covering supported fields to compare with spec, last and mask in
> + * \item.
> + * @param size
> + * Bit-Mask size in bytes.
> + *
> + * @return
> + * 0 on success, negative value otherwise.
> + */
> +static int
> +mlx4_flow_item_validate(const struct rte_flow_item *item,
> + const uint8_t *mask, unsigned int size)
> +{
> + int ret = 0;
> +
> + if (!item->spec && (item->mask || item->last))
> + return -1;
> + if (item->spec && !item->mask) {
> + unsigned int i;
> + const uint8_t *spec = item->spec;
> +
> + for (i = 0; i < size; ++i)
> + if ((spec[i] | mask[i]) != mask[i])
> + return -1;
> + }
> + if (item->last && !item->mask) {
> + unsigned int i;
> + const uint8_t *spec = item->last;
> +
> + for (i = 0; i < size; ++i)
> + if ((spec[i] | mask[i]) != mask[i])
> + return -1;
> + }
> + if (item->spec && item->last) {
> + uint8_t spec[size];
> + uint8_t last[size];
> + const uint8_t *apply = mask;
> + unsigned int i;
> +
> + if (item->mask)
> + apply = item->mask;
> + for (i = 0; i < size; ++i) {
> + spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
> + last[i] = ((const uint8_t *)item->last)[i] & apply[i];
> + }
> + ret = memcmp(spec, last, size);
> + }
> + return ret;
> +}
> +
> +static int
> +mlx4_flow_validate_eth(const struct rte_flow_item *item,
> + const uint8_t *mask, unsigned int size)
> +{
> + if (item->mask) {
> + const struct rte_flow_item_eth *mask = item->mask;
> +
> + if (mask->dst.addr_bytes[0] != 0xff ||
> + mask->dst.addr_bytes[1] != 0xff ||
> + mask->dst.addr_bytes[2] != 0xff ||
> + mask->dst.addr_bytes[3] != 0xff ||
> + mask->dst.addr_bytes[4] != 0xff ||
> + mask->dst.addr_bytes[5] != 0xff)
> + return -1;
> + }
> + return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_vlan(const struct rte_flow_item *item,
> + const uint8_t *mask, unsigned int size)
> +{
> + if (item->mask) {
> + const struct rte_flow_item_vlan *mask = item->mask;
> +
> + if (mask->tci != 0 &&
> + ntohs(mask->tci) != 0x0fff)
> + return -1;
> + }
> + return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_ipv4(const struct rte_flow_item *item,
> + const uint8_t *mask, unsigned int size)
> +{
> + if (item->mask) {
> + const struct rte_flow_item_ipv4 *mask = item->mask;
> +
> + if (mask->hdr.src_addr != 0 &&
> + mask->hdr.src_addr != 0xffffffff)
> + return -1;
> + if (mask->hdr.dst_addr != 0 &&
> + mask->hdr.dst_addr != 0xffffffff)
> + return -1;
> + }
> + return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_udp(const struct rte_flow_item *item,
> + const uint8_t *mask, unsigned int size)
> +{
> + if (item->mask) {
> + const struct rte_flow_item_udp *mask = item->mask;
> +
> + if (mask->hdr.src_port != 0 &&
> + mask->hdr.src_port != 0xffff)
> + return -1;
> + if (mask->hdr.dst_port != 0 &&
> + mask->hdr.dst_port != 0xffff)
> + return -1;
> + }
> + return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_tcp(const struct rte_flow_item *item,
> + const uint8_t *mask, unsigned int size)
> +{
> + if (item->mask) {
> + const struct rte_flow_item_tcp *mask = item->mask;
> +
> + if (mask->hdr.src_port != 0 &&
> + mask->hdr.src_port != 0xffff)
> + return -1;
> + if (mask->hdr.dst_port != 0 &&
> + mask->hdr.dst_port != 0xffff)
> + return -1;
> + }
> + return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +/** Graph of supported items and associated actions. */
> +static const struct mlx4_flow_items mlx4_flow_items[] = {
> + [RTE_FLOW_ITEM_TYPE_END] = {
> + .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
> + },
> + [RTE_FLOW_ITEM_TYPE_ETH] = {
> + .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
> + RTE_FLOW_ITEM_TYPE_IPV4),
> + .actions = valid_actions,
> + .mask = &(const struct rte_flow_item_eth){
> + .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
> + .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
> + },
> + .default_mask = &rte_flow_item_eth_mask,
> + .mask_sz = sizeof(struct rte_flow_item_eth),
> + .validate = mlx4_flow_validate_eth,
> + .convert = mlx4_flow_create_eth,
> + .dst_sz = sizeof(struct ibv_flow_spec_eth),
> + },
> + [RTE_FLOW_ITEM_TYPE_VLAN] = {
> + .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4),
> + .actions = valid_actions,
> + .mask = &(const struct rte_flow_item_vlan){
> + /* rte_flow_item_vlan_mask is invalid for mlx4. */
> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> + .tci = 0x0fff,
> +#else
> + .tci = 0xff0f,
> +#endif
> + },
> + .mask_sz = sizeof(struct rte_flow_item_vlan),
> + .validate = mlx4_flow_validate_vlan,
> + .convert = mlx4_flow_create_vlan,
> + .dst_sz = 0,
> + },
> + [RTE_FLOW_ITEM_TYPE_IPV4] = {
> + .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
> + RTE_FLOW_ITEM_TYPE_TCP),
> + .actions = valid_actions,
> + .mask = &(const struct rte_flow_item_ipv4){
> + .hdr = {
> + .src_addr = -1,
> + .dst_addr = -1,
> + },
> + },
> + .default_mask = &rte_flow_item_ipv4_mask,
> + .mask_sz = sizeof(struct rte_flow_item_ipv4),
> + .validate = mlx4_flow_validate_ipv4,
> + .convert = mlx4_flow_create_ipv4,
> + .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
> + },
> + [RTE_FLOW_ITEM_TYPE_UDP] = {
> + .actions = valid_actions,
> + .mask = &(const struct rte_flow_item_udp){
> + .hdr = {
> + .src_port = -1,
> + .dst_port = -1,
> + },
> + },
> + .default_mask = &rte_flow_item_udp_mask,
> + .mask_sz = sizeof(struct rte_flow_item_udp),
> + .validate = mlx4_flow_validate_udp,
> + .convert = mlx4_flow_create_udp,
> + .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
> + },
> + [RTE_FLOW_ITEM_TYPE_TCP] = {
> + .actions = valid_actions,
> + .mask = &(const struct rte_flow_item_tcp){
> + .hdr = {
> + .src_port = -1,
> + .dst_port = -1,
> + },
> + },
> + .default_mask = &rte_flow_item_tcp_mask,
> + .mask_sz = sizeof(struct rte_flow_item_tcp),
> + .validate = mlx4_flow_validate_tcp,
> + .convert = mlx4_flow_create_tcp,
> + .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
> + },
> +};
> +
> +/**
> + * Validate a flow supported by the NIC.
> + *
> + * @param priv
> + * Pointer to private structure.
> + * @param[in] attr
> + * Flow rule attributes.
> + * @param[in] items
> + * Pattern specification (list terminated by the END pattern item).
> + * @param[in] actions
> + * Associated actions (list terminated by the END action).
> + * @param[out] error
> + * Perform verbose error reporting if not NULL.
> + * @param[in, out] flow
> + * Flow structure to update.
> + *
> + * @return
> + * 0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +priv_flow_validate(struct priv *priv,
> + const struct rte_flow_attr *attr,
> + const struct rte_flow_item items[],
> + const struct rte_flow_action actions[],
> + struct rte_flow_error *error,
> + struct mlx4_flow *flow)
> +{
> + const struct mlx4_flow_items *cur_item = mlx4_flow_items;
> + struct mlx4_flow_action action = {
> + .queue = 0,
> + .drop = 0,
> + };
> +
> + (void)priv;
> + if (attr->group) {
> + rte_flow_error_set(error, ENOTSUP,
> + RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
> + NULL,
> + "groups are not supported");
> + return -rte_errno;
> + }
> + if (attr->priority) {
> + rte_flow_error_set(error, ENOTSUP,
> + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
> + NULL,
> + "priorities are not supported");
> + return -rte_errno;
> + }
> + if (attr->egress) {
> + rte_flow_error_set(error, ENOTSUP,
> + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
> + NULL,
> + "egress is not supported");
> + return -rte_errno;
> + }
> + if (!attr->ingress) {
> + rte_flow_error_set(error, ENOTSUP,
> + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
> + NULL,
> + "only ingress is supported");
> + return -rte_errno;
> + }
> + /* Go over items list. */
> + for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
> + const struct mlx4_flow_items *token = NULL;
> + unsigned int i;
> + int err;
> +
> + if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
> + continue;
> + /*
> + * The nic can support patterns with NULL eth spec only
> + * if eth is a single item in a rule.
> + */
> + if (!items->spec &&
> + items->type == RTE_FLOW_ITEM_TYPE_ETH) {
> + const struct rte_flow_item *next = items + 1;
> +
> + if (next->type != RTE_FLOW_ITEM_TYPE_END) {
> + rte_flow_error_set(error, ENOTSUP,
> + RTE_FLOW_ERROR_TYPE_ITEM,
> + items,
> + "the rule requires"
> + " an Ethernet spec");
> + return -rte_errno;
> + }
> + }
> + for (i = 0;
> + cur_item->items &&
> + cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
> + ++i) {
> + if (cur_item->items[i] == items->type) {
> + token = &mlx4_flow_items[items->type];
> + break;
> + }
> + }
> + if (!token)
> + goto exit_item_not_supported;
> + cur_item = token;
> + err = cur_item->validate(items,
> + (const uint8_t *)cur_item->mask,
> + cur_item->mask_sz);
> + if (err)
> + goto exit_item_not_supported;
> + if (flow->ibv_attr && cur_item->convert) {
> + err = cur_item->convert(items,
> + (cur_item->default_mask ?
> + cur_item->default_mask :
> + cur_item->mask),
> + flow);
> + if (err)
> + goto exit_item_not_supported;
> + }
> + flow->offset += cur_item->dst_sz;
> + }
> + /* Go over actions list */
> + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
> + if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
> + continue;
> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
> + action.drop = 1;
> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
> + const struct rte_flow_action_queue *queue =
> + (const struct rte_flow_action_queue *)
> + actions->conf;
> +
> + if (!queue || (queue->index > (priv->rxqs_n - 1)))
> + goto exit_action_not_supported;
> + action.queue = 1;
> + } else {
> + goto exit_action_not_supported;
> + }
> + }
> + if (!action.queue && !action.drop) {
> + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
> + NULL, "no valid action");
> + return -rte_errno;
> + }
> + return 0;
> +exit_item_not_supported:
> + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
> + items, "item not supported");
> + return -rte_errno;
> +exit_action_not_supported:
> + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
> + actions, "action not supported");
> + return -rte_errno;
> +}
> +
> +/**
> + * Validate a flow supported by the NIC.
> + *
> + * @see rte_flow_validate()
> + * @see rte_flow_ops
> + */
> +int
> +mlx4_flow_validate(struct rte_eth_dev *dev,
> + const struct rte_flow_attr *attr,
> + const struct rte_flow_item items[],
> + const struct rte_flow_action actions[],
> + struct rte_flow_error *error)
> +{
> + struct priv *priv = dev->data->dev_private;
> + int ret;
> + struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr) };
> +
> + priv_lock(priv);
> + ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
> + priv_unlock(priv);
> + return ret;
> +}
> +
> +/**
> + * Complete flow rule creation.
> + *
> + * @param priv
> + * Pointer to private structure.
> + * @param ibv_attr
> + * Verbs flow attributes.
> + * @param action
> + * Target action structure.
> + * @param[out] error
> + * Perform verbose error reporting if not NULL.
> + *
> + * @return
> + * A flow if the rule could be created.
> + */
> +static struct rte_flow *
> +priv_flow_create_action_queue(struct priv *priv,
> + struct ibv_flow_attr *ibv_attr,
> + struct mlx4_flow_action *action,
> + struct rte_flow_error *error)
> +{
> + struct rxq *rxq;
> + struct ibv_qp *qp;
> + struct rte_flow *rte_flow;
> +
> + assert(priv->pd);
> + assert(priv->ctx);
> + rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
> + if (!rte_flow) {
> + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
> + NULL, "cannot allocate flow memory");
> + return NULL;
> + }
> + rxq = (*priv->rxqs)[action->queue_id];
> + if (action->drop) {
> + rte_flow->cq =
> + ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
> + &(struct ibv_exp_cq_init_attr){
> + .comp_mask = 0,
> + });
> + if (!rte_flow->cq) {
> + rte_flow_error_set(error, ENOMEM,
> + RTE_FLOW_ERROR_TYPE_HANDLE,
> + NULL, "cannot allocate CQ");
> + goto error;
> + }
> + rte_flow->qp = ibv_exp_create_qp(
> + priv->ctx,
> + &(struct ibv_exp_qp_init_attr){
> + .send_cq = rte_flow->cq,
> + .recv_cq = rte_flow->cq,
> + .cap = {
> + .max_recv_wr = 1,
> + .max_recv_sge = 1,
> + },
> + .qp_type = IBV_QPT_RAW_PACKET,
> + .comp_mask =
> + IBV_EXP_QP_INIT_ATTR_PD |
> + IBV_EXP_QP_INIT_ATTR_PORT |
> + IBV_EXP_QP_INIT_ATTR_RES_DOMAIN,
> + .pd = priv->pd,
> + .res_domain = rxq->rd,
> + .port_num = priv->port,
> + });
> + if (!rte_flow->qp) {
> + rte_flow_error_set(error, ENOMEM,
> + RTE_FLOW_ERROR_TYPE_HANDLE,
> + NULL, "cannot allocate QP");
> + goto error;
> + }
> + qp = rte_flow->qp;
> + } else {
> + rte_flow->rxq = rxq;
> + qp = rxq->qp;
> + }
> + rte_flow->ibv_attr = ibv_attr;
> + rte_flow->ibv_flow = ibv_create_flow(qp, rte_flow->ibv_attr);
> + if (!rte_flow->ibv_flow) {
> + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
> + NULL, "flow rule creation failure");
> + goto error;
> + }
> + return rte_flow;
> +
> +error:
> + assert(rte_flow);
> + if (rte_flow->cq)
> + ibv_destroy_cq(rte_flow->cq);
> + if (rte_flow->qp)
> + ibv_destroy_qp(rte_flow->qp);
> + rte_free(rte_flow->ibv_attr);
> + rte_free(rte_flow);
> + return NULL;
> +}
> +
> +/**
> + * Convert a flow.
> + *
> + * @param priv
> + * Pointer to private structure.
> + * @param[in] attr
> + * Flow rule attributes.
> + * @param[in] items
> + * Pattern specification (list terminated by the END pattern item).
> + * @param[in] actions
> + * Associated actions (list terminated by the END action).
> + * @param[out] error
> + * Perform verbose error reporting if not NULL.
> + *
> + * @return
> + * A flow on success, NULL otherwise.
> + */
> +static struct rte_flow *
> +priv_flow_create(struct priv *priv,
> + const struct rte_flow_attr *attr,
> + const struct rte_flow_item items[],
> + const struct rte_flow_action actions[],
> + struct rte_flow_error *error)
> +{
> + struct rte_flow *rte_flow;
> + struct mlx4_flow_action action;
> + struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr), };
> + int err;
> +
> + err = priv_flow_validate(priv, attr, items, actions, error, &flow);
> + if (err)
> + return NULL;
> + flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
> + if (!flow.ibv_attr) {
> + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
> + NULL, "cannot allocate ibv_attr memory");
> + return NULL;
> + }
> + flow.offset = sizeof(struct ibv_flow_attr);
> + *flow.ibv_attr = (struct ibv_flow_attr){
> + .comp_mask = 0,
> + .type = IBV_FLOW_ATTR_NORMAL,
> + .size = sizeof(struct ibv_flow_attr),
> + .priority = attr->priority,
> + .num_of_specs = 0,
> + .port = priv->port,
> + .flags = 0,
> + };
> + claim_zero(priv_flow_validate(priv, attr, items, actions,
> + error, &flow));
> + action = (struct mlx4_flow_action){
> + .queue = 0,
> + .drop = 0,
> + };
> + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
> + if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
> + continue;
> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
> + action.queue = 1;
> + action.queue_id =
> + ((const struct rte_flow_action_queue *)
> + actions->conf)->index;
> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
> + action.drop = 1;
> + } else {
> + rte_flow_error_set(error, ENOTSUP,
> + RTE_FLOW_ERROR_TYPE_ACTION,
> + actions, "unsupported action");
> + goto exit;
> + }
> + }
> + rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
> + &action, error);
> + return rte_flow;
> +exit:
> + rte_free(flow.ibv_attr);
> + return NULL;
> +}
> +
> +/**
> + * Create a flow.
> + *
> + * @see rte_flow_create()
> + * @see rte_flow_ops
> + */
> +struct rte_flow *
> +mlx4_flow_create(struct rte_eth_dev *dev,
> + const struct rte_flow_attr *attr,
> + const struct rte_flow_item items[],
> + const struct rte_flow_action actions[],
> + struct rte_flow_error *error)
> +{
> + struct priv *priv = dev->data->dev_private;
> + struct rte_flow *flow;
> +
> + priv_lock(priv);
> + flow = priv_flow_create(priv, attr, items, actions, error);
> + if (flow) {
> + LIST_INSERT_HEAD(&priv->flows, flow, next);
> + DEBUG("Flow created %p", (void *)flow);
> + }
> + priv_unlock(priv);
> + return flow;
> +}
> +
> +/**
> + * Destroy a flow.
> + *
> + * @param priv
> + * Pointer to private structure.
> + * @param[in] flow
> + * Flow to destroy.
> + */
> +static void
> +priv_flow_destroy(struct priv *priv, struct rte_flow *flow)
> +{
> + (void)priv;
> + LIST_REMOVE(flow, next);
> + if (flow->ibv_flow)
> + claim_zero(ibv_destroy_flow(flow->ibv_flow));
> + if (flow->qp)
> + claim_zero(ibv_destroy_qp(flow->qp));
> + if (flow->cq)
> + claim_zero(ibv_destroy_cq(flow->cq));
> + rte_free(flow->ibv_attr);
> + DEBUG("Flow destroyed %p", (void *)flow);
> + rte_free(flow);
> +}
> +
> +/**
> + * Destroy a flow.
> + *
> + * @see rte_flow_destroy()
> + * @see rte_flow_ops
> + */
> +int
> +mlx4_flow_destroy(struct rte_eth_dev *dev,
> + struct rte_flow *flow,
> + struct rte_flow_error *error)
> +{
> + struct priv *priv = dev->data->dev_private;
> +
> + (void)error;
> + priv_lock(priv);
> + priv_flow_destroy(priv, flow);
> + priv_unlock(priv);
> + return 0;
> +}
> +
> +/**
> + * Destroy all flows.
> + *
> + * @param priv
> + * Pointer to private structure.
> + */
> +static void
> +priv_flow_flush(struct priv *priv)
> +{
> + while (!LIST_EMPTY(&priv->flows)) {
> + struct rte_flow *flow;
> +
> + flow = LIST_FIRST(&priv->flows);
> + priv_flow_destroy(priv, flow);
> + }
> +}
> +
> +/**
> + * Destroy all flows.
> + *
> + * @see rte_flow_flush()
> + * @see rte_flow_ops
> + */
> +int
> +mlx4_flow_flush(struct rte_eth_dev *dev,
> + struct rte_flow_error *error)
> +{
> + struct priv *priv = dev->data->dev_private;
> +
> + (void)error;
> + priv_lock(priv);
> + priv_flow_flush(priv);
> + priv_unlock(priv);
> + return 0;
> +}
> +
> +/**
> + * Remove all flows.
> + *
> + * Called by dev_stop() to remove all flows.
> + *
> + * @param priv
> + * Pointer to private structure.
> + */
> +void
> +mlx4_priv_flow_stop(struct priv *priv)
> +{
> + struct rte_flow *flow;
> +
> + for (flow = LIST_FIRST(&priv->flows);
> + flow;
> + flow = LIST_NEXT(flow, next)) {
> + claim_zero(ibv_destroy_flow(flow->ibv_flow));
> + flow->ibv_flow = NULL;
> + DEBUG("Flow %p removed", (void *)flow);
> + }
> +}
> +
> +/**
> + * Add all flows.
> + *
> + * @param priv
> + * Pointer to private structure.
> + *
> + * @return
> + * 0 on success, a errno value otherwise and rte_errno is set.
> + */
> +int
> +mlx4_priv_flow_start(struct priv *priv)
> +{
> + struct ibv_qp *qp;
> + struct rte_flow *flow;
> +
> + for (flow = LIST_FIRST(&priv->flows);
> + flow;
> + flow = LIST_NEXT(flow, next)) {
> + qp = flow->qp ? flow->qp : flow->rxq->qp;
> + flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
> + if (!flow->ibv_flow) {
> + DEBUG("Flow %p cannot be applied", (void *)flow);
> + rte_errno = EINVAL;
> + return rte_errno;
> + }
> + DEBUG("Flow %p applied", (void *)flow);
> + }
> + return 0;
> +}
> diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
> new file mode 100644
> index 0000000..537ffdf
> --- /dev/null
> +++ b/drivers/net/mlx4/mlx4_flow.h
> @@ -0,0 +1,104 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright 2017 6WIND S.A.
> + * Copyright 2017 Mellanox.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of 6WIND S.A. nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef RTE_PMD_MLX4_FLOW_H_
> +#define RTE_PMD_MLX4_FLOW_H_
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +#include <sys/queue.h>
> +
> +/* Verbs header. */
> +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic ignored "-Wpedantic"
> +#endif
> +#include <infiniband/verbs.h>
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic error "-Wpedantic"
> +#endif
> +
> +#include <rte_flow.h>
> +#include <rte_flow_driver.h>
> +#include <rte_byteorder.h>
> +
> +#include "mlx4.h"
> +
> +struct rte_flow {
> + LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
> + struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
> + struct ibv_flow *ibv_flow; /**< Verbs flow. */
> + struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
> + struct ibv_qp *qp; /**< Verbs queue pair. */
> + struct ibv_cq *cq; /**< Verbs completion queue. */
> +};
> +
> +int
> +mlx4_flow_validate(struct rte_eth_dev *dev,
> + const struct rte_flow_attr *attr,
> + const struct rte_flow_item items[],
> + const struct rte_flow_action actions[],
> + struct rte_flow_error *error);
> +
> +struct rte_flow *
> +mlx4_flow_create(struct rte_eth_dev *dev,
> + const struct rte_flow_attr *attr,
> + const struct rte_flow_item items[],
> + const struct rte_flow_action actions[],
> + struct rte_flow_error *error);
> +
> +int
> +mlx4_flow_destroy(struct rte_eth_dev *,
> + struct rte_flow *,
> + struct rte_flow_error *);
> +
> +int
> +mlx4_flow_flush(struct rte_eth_dev *dev,
> + struct rte_flow_error *error);
> +
> +/** Structure to pass to the conversion function. */
> +struct mlx4_flow {
> + struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
> + unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
> +};
> +
> +struct mlx4_flow_action {
> + uint32_t drop:1; /**< Target is a drop queue. */
> + uint32_t queue:1; /**< Target is a receive queue. */
> + uint32_t queue_id; /**< Identifier of the queue. */
> +};
> +
> +int mlx4_priv_flow_start(struct priv *);
> +void mlx4_priv_flow_stop(struct priv *);
> +
> +#endif /* RTE_PMD_MLX4_FLOW_H_ */
> --
> 1.8.3.1
>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dpdk-dev] [PATCH 2/2] net/mlx4: support basic flow items and actions
2017-02-22 8:37 ` Nélio Laranjeiro
@ 2017-02-22 10:10 ` Nélio Laranjeiro
0 siblings, 0 replies; 15+ messages in thread
From: Nélio Laranjeiro @ 2017-02-22 10:10 UTC (permalink / raw)
To: Vasily Philipov; +Cc: dev, Adrien Mazarguil
On Wed, Feb 22, 2017 at 09:37:42AM +0100, Nélio Laranjeiro wrote:
> On Tue, Feb 21, 2017 at 02:07:03PM +0000, Vasily Philipov wrote:
> > Adding support for the next items: eth, vlan, ipv4, udp, tcp and for the
> > next actions: queue, drop
> >
> > Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> > ---
> > drivers/net/mlx4/Makefile | 3 +-
> > drivers/net/mlx4/mlx4.c | 60 ++-
> > drivers/net/mlx4/mlx4.h | 3 +
> > drivers/net/mlx4/mlx4_flow.c | 1053 ++++++++++++++++++++++++++++++++++++++++++
> > drivers/net/mlx4/mlx4_flow.h | 104 +++++
> > 5 files changed, 1220 insertions(+), 3 deletions(-)
> > create mode 100644 drivers/net/mlx4/mlx4_flow.c
> > create mode 100644 drivers/net/mlx4/mlx4_flow.h
> >
>[...]
> > diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> > index 82ccac8..cc2ebfa 100644
> > --- a/drivers/net/mlx4/mlx4.c
> > +++ b/drivers/net/mlx4/mlx4.c
> > @@ -3985,8 +3989,9 @@ struct txq_mp2mr_mbuf_check_data {
> > return -ret;
> > } while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
> > priv_dev_interrupt_handler_install(priv, dev);
> > + err = mlx4_priv_flow_start(priv);
> > priv_unlock(priv);
> > - return 0;
> > + return -err;
>[...]
Hi Vasily,
There is an issue in this mlx4_dev_start() when flows cannot be
re-applied (like in mlx5 [1][2]). Can you fix it in a v2 please?
Thanks,
[1] http://dpdk.org/ml/archives/dev/2017-February/058111.html
[2] http://dpdk.org/dev/patchwork/patch/20664/
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file
2017-02-21 14:07 [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
2017-02-21 14:07 ` [dpdk-dev] [PATCH 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
@ 2017-02-22 8:37 ` Nélio Laranjeiro
2017-02-22 13:42 ` [dpdk-dev] [PATCH v2 " Vasily Philipov
` (3 subsequent siblings)
5 siblings, 0 replies; 15+ messages in thread
From: Nélio Laranjeiro @ 2017-02-22 8:37 UTC (permalink / raw)
To: Vasily Philipov; +Cc: dev, Adrien Mazarguil
On Tue, Feb 21, 2017 at 02:07:02PM +0000, Vasily Philipov wrote:
> Make some structs/defines visible from different source files by placing
> them into mlx4.h header.
>
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> ---
> drivers/net/mlx4/mlx4.c | 183 ++--------------------------------------------
> drivers/net/mlx4/mlx4.h | 187 +++++++++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 189 insertions(+), 181 deletions(-)
>
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index 79efaaa..82ccac8 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -1,8 +1,8 @@
> /*-
> * BSD LICENSE
> *
> - * Copyright 2012-2015 6WIND S.A.
> - * Copyright 2012 Mellanox.
> + * Copyright 2012-2017 6WIND S.A.
> + * Copyright 2012-2017 Mellanox.
> *
> * Redistribution and use in source and binary forms, with or without
> * modification, are permitted provided that the following conditions
> @@ -68,10 +68,6 @@
> #pragma GCC diagnostic error "-Wpedantic"
> #endif
>
> -/* DPDK headers don't like -pedantic. */
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic ignored "-Wpedantic"
> -#endif
> #include <rte_ether.h>
> #include <rte_ethdev.h>
> #include <rte_dev.h>
> @@ -86,9 +82,6 @@
> #include <rte_log.h>
> #include <rte_alarm.h>
> #include <rte_memory.h>
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic error "-Wpedantic"
> -#endif
>
> /* Generated configuration header. */
> #include "mlx4_autoconf.h"
> @@ -96,21 +89,6 @@
> /* PMD header. */
> #include "mlx4.h"
>
> -/* Runtime logging through RTE_LOG() is enabled when not in debugging mode.
> - * Intermediate LOG_*() macros add the required end-of-line characters. */
> -#ifndef NDEBUG
> -#define INFO(...) DEBUG(__VA_ARGS__)
> -#define WARN(...) DEBUG(__VA_ARGS__)
> -#define ERROR(...) DEBUG(__VA_ARGS__)
> -#else
> -#define LOG__(level, m, ...) \
> - RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
> -#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
> -#define INFO(...) LOG_(INFO, __VA_ARGS__)
> -#define WARN(...) LOG_(WARNING, __VA_ARGS__)
> -#define ERROR(...) LOG_(ERR, __VA_ARGS__)
> -#endif
> -
> /* Convenience macros for accessing mbuf fields. */
> #define NEXT(m) ((m)->next)
> #define DATA_LEN(m) ((m)->data_len)
> @@ -137,157 +115,6 @@
> (((val) & (from)) / ((from) / (to))) : \
> (((val) & (from)) * ((to) / (from))))
>
> -struct mlx4_rxq_stats {
> - unsigned int idx; /**< Mapping index. */
> -#ifdef MLX4_PMD_SOFT_COUNTERS
> - uint64_t ipackets; /**< Total of successfully received packets. */
> - uint64_t ibytes; /**< Total of successfully received bytes. */
> -#endif
> - uint64_t idropped; /**< Total of packets dropped when RX ring full. */
> - uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
> -};
> -
> -struct mlx4_txq_stats {
> - unsigned int idx; /**< Mapping index. */
> -#ifdef MLX4_PMD_SOFT_COUNTERS
> - uint64_t opackets; /**< Total of successfully sent packets. */
> - uint64_t obytes; /**< Total of successfully sent bytes. */
> -#endif
> - uint64_t odropped; /**< Total of packets not sent when TX ring full. */
> -};
> -
> -/* RX element (scattered packets). */
> -struct rxq_elt_sp {
> - struct ibv_recv_wr wr; /* Work Request. */
> - struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
> - struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
> -};
> -
> -/* RX element. */
> -struct rxq_elt {
> - struct ibv_recv_wr wr; /* Work Request. */
> - struct ibv_sge sge; /* Scatter/Gather Element. */
> - /* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
> -};
> -
> -/* RX queue descriptor. */
> -struct rxq {
> - struct priv *priv; /* Back pointer to private data. */
> - struct rte_mempool *mp; /* Memory Pool for allocations. */
> - struct ibv_mr *mr; /* Memory Region (for mp). */
> - struct ibv_cq *cq; /* Completion Queue. */
> - struct ibv_qp *qp; /* Queue Pair. */
> - struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> - struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> - /*
> - * Each VLAN ID requires a separate flow steering rule.
> - */
> - BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> - struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
> - struct ibv_flow *promisc_flow; /* Promiscuous flow. */
> - struct ibv_flow *allmulti_flow; /* Multicast flow. */
> - unsigned int port_id; /* Port ID for incoming packets. */
> - unsigned int elts_n; /* (*elts)[] length. */
> - unsigned int elts_head; /* Current index in (*elts)[]. */
> - union {
> - struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
> - struct rxq_elt (*no_sp)[]; /* RX elements. */
> - } elts;
> - unsigned int sp:1; /* Use scattered RX elements. */
> - unsigned int csum:1; /* Enable checksum offloading. */
> - unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
> - struct mlx4_rxq_stats stats; /* RX queue counters. */
> - unsigned int socket; /* CPU socket ID for allocations. */
> - struct ibv_exp_res_domain *rd; /* Resource Domain. */
> -};
> -
> -/* TX element. */
> -struct txq_elt {
> - struct rte_mbuf *buf;
> -};
> -
> -/* Linear buffer type. It is used when transmitting buffers with too many
> - * segments that do not fit the hardware queue (see max_send_sge).
> - * Extra segments are copied (linearized) in such buffers, replacing the
> - * last SGE during TX.
> - * The size is arbitrary but large enough to hold a jumbo frame with
> - * 8 segments considering mbuf.buf_len is about 2048 bytes. */
> -typedef uint8_t linear_t[16384];
> -
> -/* TX queue descriptor. */
> -struct txq {
> - struct priv *priv; /* Back pointer to private data. */
> - struct {
> - const struct rte_mempool *mp; /* Cached Memory Pool. */
> - struct ibv_mr *mr; /* Memory Region (for mp). */
> - uint32_t lkey; /* mr->lkey */
> - } mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
> - struct ibv_cq *cq; /* Completion Queue. */
> - struct ibv_qp *qp; /* Queue Pair. */
> - struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> - struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> -#if MLX4_PMD_MAX_INLINE > 0
> - uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
> -#endif
> - unsigned int elts_n; /* (*elts)[] length. */
> - struct txq_elt (*elts)[]; /* TX elements. */
> - unsigned int elts_head; /* Current index in (*elts)[]. */
> - unsigned int elts_tail; /* First element awaiting completion. */
> - unsigned int elts_comp; /* Number of completion requests. */
> - unsigned int elts_comp_cd; /* Countdown for next completion request. */
> - unsigned int elts_comp_cd_init; /* Initial value for countdown. */
> - struct mlx4_txq_stats stats; /* TX queue counters. */
> - linear_t (*elts_linear)[]; /* Linearized buffers. */
> - struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
> - unsigned int socket; /* CPU socket ID for allocations. */
> - struct ibv_exp_res_domain *rd; /* Resource Domain. */
> -};
> -
> -struct priv {
> - struct rte_eth_dev *dev; /* Ethernet device. */
> - struct ibv_context *ctx; /* Verbs context. */
> - struct ibv_device_attr device_attr; /* Device properties. */
> - struct ibv_pd *pd; /* Protection Domain. */
> - /*
> - * MAC addresses array and configuration bit-field.
> - * An extra entry that cannot be modified by the DPDK is reserved
> - * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
> - */
> - struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
> - BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> - /* VLAN filters. */
> - struct {
> - unsigned int enabled:1; /* If enabled. */
> - unsigned int id:12; /* VLAN ID (0-4095). */
> - } vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
> - /* Device properties. */
> - uint16_t mtu; /* Configured MTU. */
> - uint8_t port; /* Physical port number. */
> - unsigned int started:1; /* Device started, flows enabled. */
> - unsigned int promisc:1; /* Device in promiscuous mode. */
> - unsigned int allmulti:1; /* Device receives all multicast packets. */
> - unsigned int hw_qpg:1; /* QP groups are supported. */
> - unsigned int hw_tss:1; /* TSS is supported. */
> - unsigned int hw_rss:1; /* RSS is supported. */
> - unsigned int hw_csum:1; /* Checksum offload is supported. */
> - unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
> - unsigned int rss:1; /* RSS is enabled. */
> - unsigned int vf:1; /* This is a VF device. */
> - unsigned int pending_alarm:1; /* An alarm is pending. */
> -#ifdef INLINE_RECV
> - unsigned int inl_recv_size; /* Inline recv size */
> -#endif
> - unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
> - /* RX/TX queues. */
> - struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
> - unsigned int rxqs_n; /* RX queues array size. */
> - unsigned int txqs_n; /* TX queues array size. */
> - struct rxq *(*rxqs)[]; /* RX queues. */
> - struct txq *(*txqs)[]; /* TX queues. */
> - struct rte_intr_handle intr_handle; /* Interrupt handler. */
> - rte_spinlock_t lock; /* Lock for control functions. */
> -};
> -
> /* Local storage for secondary process data. */
> struct mlx4_secondary_data {
> struct rte_eth_dev_data data; /* Local device data. */
> @@ -335,8 +162,7 @@ struct mlx4_secondary_data {
> * @param priv
> * Pointer to private structure.
> */
> -static void
> -priv_lock(struct priv *priv)
> +void priv_lock(struct priv *priv)
> {
> rte_spinlock_lock(&priv->lock);
> }
> @@ -347,8 +173,7 @@ struct mlx4_secondary_data {
> * @param priv
> * Pointer to private structure.
> */
> -static void
> -priv_unlock(struct priv *priv)
> +void priv_unlock(struct priv *priv)
> {
> rte_spinlock_unlock(&priv->lock);
> }
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index 4c7505e..70c9ecd 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -1,8 +1,8 @@
> /*-
> * BSD LICENSE
> *
> - * Copyright 2012-2015 6WIND S.A.
> - * Copyright 2012 Mellanox.
> + * Copyright 2012-2017 6WIND S.A.
> + * Copyright 2012-2017 Mellanox.
> *
> * Redistribution and use in source and binary forms, with or without
> * modification, are permitted provided that the following conditions
> @@ -39,6 +39,33 @@
> #include <limits.h>
>
> /*
> + * Runtime logging through RTE_LOG() is enabled when not in debugging mode.
> + * Intermediate LOG_*() macros add the required end-of-line characters.
> + */
> +#ifndef NDEBUG
> +#define INFO(...) DEBUG(__VA_ARGS__)
> +#define WARN(...) DEBUG(__VA_ARGS__)
> +#define ERROR(...) DEBUG(__VA_ARGS__)
> +#else
> +#define LOG__(level, m, ...) \
> + RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
> +#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
> +#define INFO(...) LOG_(INFO, __VA_ARGS__)
> +#define WARN(...) LOG_(WARNING, __VA_ARGS__)
> +#define ERROR(...) LOG_(ERR, __VA_ARGS__)
> +#endif
> +
> +/* Verbs header. */
> +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic ignored "-Wpedantic"
> +#endif
> +#include <infiniband/verbs.h>
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic error "-Wpedantic"
> +#endif
> +
> +/*
> * Maximum number of simultaneous MAC addresses supported.
> *
> * According to ConnectX's Programmer Reference Manual:
> @@ -160,4 +187,160 @@ enum {
> #define claim_positive(...) (__VA_ARGS__)
> #endif /* NDEBUG */
>
> +struct mlx4_rxq_stats {
> + unsigned int idx; /**< Mapping index. */
> +#ifdef MLX4_PMD_SOFT_COUNTERS
> + uint64_t ipackets; /**< Total of successfully received packets. */
> + uint64_t ibytes; /**< Total of successfully received bytes. */
> +#endif
> + uint64_t idropped; /**< Total of packets dropped when RX ring full. */
> + uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
> +};
> +
> +/* RX element (scattered packets). */
> +struct rxq_elt_sp {
> + struct ibv_recv_wr wr; /* Work Request. */
> + struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
> + struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
> +};
> +
> +/* RX element. */
> +struct rxq_elt {
> + struct ibv_recv_wr wr; /* Work Request. */
> + struct ibv_sge sge; /* Scatter/Gather Element. */
> + /* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
> +};
> +
> +/* RX queue descriptor. */
> +struct rxq {
> + struct priv *priv; /* Back pointer to private data. */
> + struct rte_mempool *mp; /* Memory Pool for allocations. */
> + struct ibv_mr *mr; /* Memory Region (for mp). */
> + struct ibv_cq *cq; /* Completion Queue. */
> + struct ibv_qp *qp; /* Queue Pair. */
> + struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> + struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> + /*
> + * Each VLAN ID requires a separate flow steering rule.
> + */
> + BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> + struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
> + struct ibv_flow *promisc_flow; /* Promiscuous flow. */
> + struct ibv_flow *allmulti_flow; /* Multicast flow. */
> + unsigned int port_id; /* Port ID for incoming packets. */
> + unsigned int elts_n; /* (*elts)[] length. */
> + unsigned int elts_head; /* Current index in (*elts)[]. */
> + union {
> + struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
> + struct rxq_elt (*no_sp)[]; /* RX elements. */
> + } elts;
> + unsigned int sp:1; /* Use scattered RX elements. */
> + unsigned int csum:1; /* Enable checksum offloading. */
> + unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
> + struct mlx4_rxq_stats stats; /* RX queue counters. */
> + unsigned int socket; /* CPU socket ID for allocations. */
> + struct ibv_exp_res_domain *rd; /* Resource Domain. */
> +};
> +
> +/* TX element. */
> +struct txq_elt {
> + struct rte_mbuf *buf;
> +};
> +
> +struct mlx4_txq_stats {
> + unsigned int idx; /**< Mapping index. */
> +#ifdef MLX4_PMD_SOFT_COUNTERS
> + uint64_t opackets; /**< Total of successfully sent packets. */
> + uint64_t obytes; /**< Total of successfully sent bytes. */
> +#endif
> + uint64_t odropped; /**< Total of packets not sent when TX ring full. */
> +};
> +
> +/*
> + * Linear buffer type. It is used when transmitting buffers with too many
> + * segments that do not fit the hardware queue (see max_send_sge).
> + * Extra segments are copied (linearized) in such buffers, replacing the
> + * last SGE during TX.
> + * The size is arbitrary but large enough to hold a jumbo frame with
> + * 8 segments considering mbuf.buf_len is about 2048 bytes.
> + */
> +typedef uint8_t linear_t[16384];
> +
> +/* TX queue descriptor. */
> +struct txq {
> + struct priv *priv; /* Back pointer to private data. */
> + struct {
> + const struct rte_mempool *mp; /* Cached Memory Pool. */
> + struct ibv_mr *mr; /* Memory Region (for mp). */
> + uint32_t lkey; /* mr->lkey */
> + } mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
> + struct ibv_cq *cq; /* Completion Queue. */
> + struct ibv_qp *qp; /* Queue Pair. */
> + struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> + struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> +#if MLX4_PMD_MAX_INLINE > 0
> + uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
> +#endif
> + unsigned int elts_n; /* (*elts)[] length. */
> + struct txq_elt (*elts)[]; /* TX elements. */
> + unsigned int elts_head; /* Current index in (*elts)[]. */
> + unsigned int elts_tail; /* First element awaiting completion. */
> + unsigned int elts_comp; /* Number of completion requests. */
> + unsigned int elts_comp_cd; /* Countdown for next completion request. */
> + unsigned int elts_comp_cd_init; /* Initial value for countdown. */
> + struct mlx4_txq_stats stats; /* TX queue counters. */
> + linear_t (*elts_linear)[]; /* Linearized buffers. */
> + struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
> + unsigned int socket; /* CPU socket ID for allocations. */
> + struct ibv_exp_res_domain *rd; /* Resource Domain. */
> +};
> +
> +struct priv {
> + struct rte_eth_dev *dev; /* Ethernet device. */
> + struct ibv_context *ctx; /* Verbs context. */
> + struct ibv_device_attr device_attr; /* Device properties. */
> + struct ibv_pd *pd; /* Protection Domain. */
> + /*
> + * MAC addresses array and configuration bit-field.
> + * An extra entry that cannot be modified by the DPDK is reserved
> + * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
> + */
> + struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
> + BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> + /* VLAN filters. */
> + struct {
> + unsigned int enabled:1; /* If enabled. */
> + unsigned int id:12; /* VLAN ID (0-4095). */
> + } vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
> + /* Device properties. */
> + uint16_t mtu; /* Configured MTU. */
> + uint8_t port; /* Physical port number. */
> + unsigned int started:1; /* Device started, flows enabled. */
> + unsigned int promisc:1; /* Device in promiscuous mode. */
> + unsigned int allmulti:1; /* Device receives all multicast packets. */
> + unsigned int hw_qpg:1; /* QP groups are supported. */
> + unsigned int hw_tss:1; /* TSS is supported. */
> + unsigned int hw_rss:1; /* RSS is supported. */
> + unsigned int hw_csum:1; /* Checksum offload is supported. */
> + unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
> + unsigned int rss:1; /* RSS is enabled. */
> + unsigned int vf:1; /* This is a VF device. */
> + unsigned int pending_alarm:1; /* An alarm is pending. */
> +#ifdef INLINE_RECV
> + unsigned int inl_recv_size; /* Inline recv size */
> +#endif
> + unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
> + /* RX/TX queues. */
> + struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
> + unsigned int rxqs_n; /* RX queues array size. */
> + unsigned int txqs_n; /* TX queues array size. */
> + struct rxq *(*rxqs)[]; /* RX queues. */
> + struct txq *(*txqs)[]; /* TX queues. */
> + struct rte_intr_handle intr_handle; /* Interrupt handler. */
> + rte_spinlock_t lock; /* Lock for control functions. */
> +};
> +
> +void priv_lock(struct priv *priv);
> +void priv_unlock(struct priv *priv);
> +
> #endif /* RTE_PMD_MLX4_H_ */
> --
> 1.8.3.1
>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 15+ messages in thread
* [dpdk-dev] [PATCH v2 1/2] net/mlx4: split the definitions to the header file
2017-02-21 14:07 [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
2017-02-21 14:07 ` [dpdk-dev] [PATCH 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
2017-02-22 8:37 ` [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Nélio Laranjeiro
@ 2017-02-22 13:42 ` Vasily Philipov
2017-02-22 19:04 ` Ferruh Yigit
2017-02-22 13:42 ` [dpdk-dev] [PATCH v2 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
` (2 subsequent siblings)
5 siblings, 1 reply; 15+ messages in thread
From: Vasily Philipov @ 2017-02-22 13:42 UTC (permalink / raw)
To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro
Make some structs/defines visible from different source files by placing
them into mlx4.h header.
Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
drivers/net/mlx4/mlx4.c | 183 ++--------------------------------------------
drivers/net/mlx4/mlx4.h | 187 +++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 189 insertions(+), 181 deletions(-)
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 79efaaa..82ccac8 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -1,8 +1,8 @@
/*-
* BSD LICENSE
*
- * Copyright 2012-2015 6WIND S.A.
- * Copyright 2012 Mellanox.
+ * Copyright 2012-2017 6WIND S.A.
+ * Copyright 2012-2017 Mellanox.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -68,10 +68,6 @@
#pragma GCC diagnostic error "-Wpedantic"
#endif
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_dev.h>
@@ -86,9 +82,6 @@
#include <rte_log.h>
#include <rte_alarm.h>
#include <rte_memory.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
/* Generated configuration header. */
#include "mlx4_autoconf.h"
@@ -96,21 +89,6 @@
/* PMD header. */
#include "mlx4.h"
-/* Runtime logging through RTE_LOG() is enabled when not in debugging mode.
- * Intermediate LOG_*() macros add the required end-of-line characters. */
-#ifndef NDEBUG
-#define INFO(...) DEBUG(__VA_ARGS__)
-#define WARN(...) DEBUG(__VA_ARGS__)
-#define ERROR(...) DEBUG(__VA_ARGS__)
-#else
-#define LOG__(level, m, ...) \
- RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
-#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
-#define INFO(...) LOG_(INFO, __VA_ARGS__)
-#define WARN(...) LOG_(WARNING, __VA_ARGS__)
-#define ERROR(...) LOG_(ERR, __VA_ARGS__)
-#endif
-
/* Convenience macros for accessing mbuf fields. */
#define NEXT(m) ((m)->next)
#define DATA_LEN(m) ((m)->data_len)
@@ -137,157 +115,6 @@
(((val) & (from)) / ((from) / (to))) : \
(((val) & (from)) * ((to) / (from))))
-struct mlx4_rxq_stats {
- unsigned int idx; /**< Mapping index. */
-#ifdef MLX4_PMD_SOFT_COUNTERS
- uint64_t ipackets; /**< Total of successfully received packets. */
- uint64_t ibytes; /**< Total of successfully received bytes. */
-#endif
- uint64_t idropped; /**< Total of packets dropped when RX ring full. */
- uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
-};
-
-struct mlx4_txq_stats {
- unsigned int idx; /**< Mapping index. */
-#ifdef MLX4_PMD_SOFT_COUNTERS
- uint64_t opackets; /**< Total of successfully sent packets. */
- uint64_t obytes; /**< Total of successfully sent bytes. */
-#endif
- uint64_t odropped; /**< Total of packets not sent when TX ring full. */
-};
-
-/* RX element (scattered packets). */
-struct rxq_elt_sp {
- struct ibv_recv_wr wr; /* Work Request. */
- struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
- struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
-};
-
-/* RX element. */
-struct rxq_elt {
- struct ibv_recv_wr wr; /* Work Request. */
- struct ibv_sge sge; /* Scatter/Gather Element. */
- /* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
-};
-
-/* RX queue descriptor. */
-struct rxq {
- struct priv *priv; /* Back pointer to private data. */
- struct rte_mempool *mp; /* Memory Pool for allocations. */
- struct ibv_mr *mr; /* Memory Region (for mp). */
- struct ibv_cq *cq; /* Completion Queue. */
- struct ibv_qp *qp; /* Queue Pair. */
- struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
- struct ibv_exp_cq_family *if_cq; /* CQ interface. */
- /*
- * Each VLAN ID requires a separate flow steering rule.
- */
- BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
- struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
- struct ibv_flow *promisc_flow; /* Promiscuous flow. */
- struct ibv_flow *allmulti_flow; /* Multicast flow. */
- unsigned int port_id; /* Port ID for incoming packets. */
- unsigned int elts_n; /* (*elts)[] length. */
- unsigned int elts_head; /* Current index in (*elts)[]. */
- union {
- struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
- struct rxq_elt (*no_sp)[]; /* RX elements. */
- } elts;
- unsigned int sp:1; /* Use scattered RX elements. */
- unsigned int csum:1; /* Enable checksum offloading. */
- unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
- struct mlx4_rxq_stats stats; /* RX queue counters. */
- unsigned int socket; /* CPU socket ID for allocations. */
- struct ibv_exp_res_domain *rd; /* Resource Domain. */
-};
-
-/* TX element. */
-struct txq_elt {
- struct rte_mbuf *buf;
-};
-
-/* Linear buffer type. It is used when transmitting buffers with too many
- * segments that do not fit the hardware queue (see max_send_sge).
- * Extra segments are copied (linearized) in such buffers, replacing the
- * last SGE during TX.
- * The size is arbitrary but large enough to hold a jumbo frame with
- * 8 segments considering mbuf.buf_len is about 2048 bytes. */
-typedef uint8_t linear_t[16384];
-
-/* TX queue descriptor. */
-struct txq {
- struct priv *priv; /* Back pointer to private data. */
- struct {
- const struct rte_mempool *mp; /* Cached Memory Pool. */
- struct ibv_mr *mr; /* Memory Region (for mp). */
- uint32_t lkey; /* mr->lkey */
- } mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
- struct ibv_cq *cq; /* Completion Queue. */
- struct ibv_qp *qp; /* Queue Pair. */
- struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
- struct ibv_exp_cq_family *if_cq; /* CQ interface. */
-#if MLX4_PMD_MAX_INLINE > 0
- uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
-#endif
- unsigned int elts_n; /* (*elts)[] length. */
- struct txq_elt (*elts)[]; /* TX elements. */
- unsigned int elts_head; /* Current index in (*elts)[]. */
- unsigned int elts_tail; /* First element awaiting completion. */
- unsigned int elts_comp; /* Number of completion requests. */
- unsigned int elts_comp_cd; /* Countdown for next completion request. */
- unsigned int elts_comp_cd_init; /* Initial value for countdown. */
- struct mlx4_txq_stats stats; /* TX queue counters. */
- linear_t (*elts_linear)[]; /* Linearized buffers. */
- struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
- unsigned int socket; /* CPU socket ID for allocations. */
- struct ibv_exp_res_domain *rd; /* Resource Domain. */
-};
-
-struct priv {
- struct rte_eth_dev *dev; /* Ethernet device. */
- struct ibv_context *ctx; /* Verbs context. */
- struct ibv_device_attr device_attr; /* Device properties. */
- struct ibv_pd *pd; /* Protection Domain. */
- /*
- * MAC addresses array and configuration bit-field.
- * An extra entry that cannot be modified by the DPDK is reserved
- * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
- */
- struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
- BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
- /* VLAN filters. */
- struct {
- unsigned int enabled:1; /* If enabled. */
- unsigned int id:12; /* VLAN ID (0-4095). */
- } vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
- /* Device properties. */
- uint16_t mtu; /* Configured MTU. */
- uint8_t port; /* Physical port number. */
- unsigned int started:1; /* Device started, flows enabled. */
- unsigned int promisc:1; /* Device in promiscuous mode. */
- unsigned int allmulti:1; /* Device receives all multicast packets. */
- unsigned int hw_qpg:1; /* QP groups are supported. */
- unsigned int hw_tss:1; /* TSS is supported. */
- unsigned int hw_rss:1; /* RSS is supported. */
- unsigned int hw_csum:1; /* Checksum offload is supported. */
- unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
- unsigned int rss:1; /* RSS is enabled. */
- unsigned int vf:1; /* This is a VF device. */
- unsigned int pending_alarm:1; /* An alarm is pending. */
-#ifdef INLINE_RECV
- unsigned int inl_recv_size; /* Inline recv size */
-#endif
- unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
- /* RX/TX queues. */
- struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
- unsigned int rxqs_n; /* RX queues array size. */
- unsigned int txqs_n; /* TX queues array size. */
- struct rxq *(*rxqs)[]; /* RX queues. */
- struct txq *(*txqs)[]; /* TX queues. */
- struct rte_intr_handle intr_handle; /* Interrupt handler. */
- rte_spinlock_t lock; /* Lock for control functions. */
-};
-
/* Local storage for secondary process data. */
struct mlx4_secondary_data {
struct rte_eth_dev_data data; /* Local device data. */
@@ -335,8 +162,7 @@ struct mlx4_secondary_data {
* @param priv
* Pointer to private structure.
*/
-static void
-priv_lock(struct priv *priv)
+void priv_lock(struct priv *priv)
{
rte_spinlock_lock(&priv->lock);
}
@@ -347,8 +173,7 @@ struct mlx4_secondary_data {
* @param priv
* Pointer to private structure.
*/
-static void
-priv_unlock(struct priv *priv)
+void priv_unlock(struct priv *priv)
{
rte_spinlock_unlock(&priv->lock);
}
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 4c7505e..70c9ecd 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -1,8 +1,8 @@
/*-
* BSD LICENSE
*
- * Copyright 2012-2015 6WIND S.A.
- * Copyright 2012 Mellanox.
+ * Copyright 2012-2017 6WIND S.A.
+ * Copyright 2012-2017 Mellanox.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -39,6 +39,33 @@
#include <limits.h>
/*
+ * Runtime logging through RTE_LOG() is enabled when not in debugging mode.
+ * Intermediate LOG_*() macros add the required end-of-line characters.
+ */
+#ifndef NDEBUG
+#define INFO(...) DEBUG(__VA_ARGS__)
+#define WARN(...) DEBUG(__VA_ARGS__)
+#define ERROR(...) DEBUG(__VA_ARGS__)
+#else
+#define LOG__(level, m, ...) \
+ RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
+#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
+#define INFO(...) LOG_(INFO, __VA_ARGS__)
+#define WARN(...) LOG_(WARNING, __VA_ARGS__)
+#define ERROR(...) LOG_(ERR, __VA_ARGS__)
+#endif
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+/*
* Maximum number of simultaneous MAC addresses supported.
*
* According to ConnectX's Programmer Reference Manual:
@@ -160,4 +187,160 @@ enum {
#define claim_positive(...) (__VA_ARGS__)
#endif /* NDEBUG */
+struct mlx4_rxq_stats {
+ unsigned int idx; /**< Mapping index. */
+#ifdef MLX4_PMD_SOFT_COUNTERS
+ uint64_t ipackets; /**< Total of successfully received packets. */
+ uint64_t ibytes; /**< Total of successfully received bytes. */
+#endif
+ uint64_t idropped; /**< Total of packets dropped when RX ring full. */
+ uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
+};
+
+/* RX element (scattered packets). */
+struct rxq_elt_sp {
+ struct ibv_recv_wr wr; /* Work Request. */
+ struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
+ struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
+};
+
+/* RX element. */
+struct rxq_elt {
+ struct ibv_recv_wr wr; /* Work Request. */
+ struct ibv_sge sge; /* Scatter/Gather Element. */
+ /* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
+};
+
+/* RX queue descriptor. */
+struct rxq {
+ struct priv *priv; /* Back pointer to private data. */
+ struct rte_mempool *mp; /* Memory Pool for allocations. */
+ struct ibv_mr *mr; /* Memory Region (for mp). */
+ struct ibv_cq *cq; /* Completion Queue. */
+ struct ibv_qp *qp; /* Queue Pair. */
+ struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
+ struct ibv_exp_cq_family *if_cq; /* CQ interface. */
+ /*
+ * Each VLAN ID requires a separate flow steering rule.
+ */
+ BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
+ struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
+ struct ibv_flow *promisc_flow; /* Promiscuous flow. */
+ struct ibv_flow *allmulti_flow; /* Multicast flow. */
+ unsigned int port_id; /* Port ID for incoming packets. */
+ unsigned int elts_n; /* (*elts)[] length. */
+ unsigned int elts_head; /* Current index in (*elts)[]. */
+ union {
+ struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
+ struct rxq_elt (*no_sp)[]; /* RX elements. */
+ } elts;
+ unsigned int sp:1; /* Use scattered RX elements. */
+ unsigned int csum:1; /* Enable checksum offloading. */
+ unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
+ struct mlx4_rxq_stats stats; /* RX queue counters. */
+ unsigned int socket; /* CPU socket ID for allocations. */
+ struct ibv_exp_res_domain *rd; /* Resource Domain. */
+};
+
+/* TX element. */
+struct txq_elt {
+ struct rte_mbuf *buf;
+};
+
+struct mlx4_txq_stats {
+ unsigned int idx; /**< Mapping index. */
+#ifdef MLX4_PMD_SOFT_COUNTERS
+ uint64_t opackets; /**< Total of successfully sent packets. */
+ uint64_t obytes; /**< Total of successfully sent bytes. */
+#endif
+ uint64_t odropped; /**< Total of packets not sent when TX ring full. */
+};
+
+/*
+ * Linear buffer type. It is used when transmitting buffers with too many
+ * segments that do not fit the hardware queue (see max_send_sge).
+ * Extra segments are copied (linearized) in such buffers, replacing the
+ * last SGE during TX.
+ * The size is arbitrary but large enough to hold a jumbo frame with
+ * 8 segments considering mbuf.buf_len is about 2048 bytes.
+ */
+typedef uint8_t linear_t[16384];
+
+/* TX queue descriptor. */
+struct txq {
+ struct priv *priv; /* Back pointer to private data. */
+ struct {
+ const struct rte_mempool *mp; /* Cached Memory Pool. */
+ struct ibv_mr *mr; /* Memory Region (for mp). */
+ uint32_t lkey; /* mr->lkey */
+ } mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
+ struct ibv_cq *cq; /* Completion Queue. */
+ struct ibv_qp *qp; /* Queue Pair. */
+ struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
+ struct ibv_exp_cq_family *if_cq; /* CQ interface. */
+#if MLX4_PMD_MAX_INLINE > 0
+ uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
+#endif
+ unsigned int elts_n; /* (*elts)[] length. */
+ struct txq_elt (*elts)[]; /* TX elements. */
+ unsigned int elts_head; /* Current index in (*elts)[]. */
+ unsigned int elts_tail; /* First element awaiting completion. */
+ unsigned int elts_comp; /* Number of completion requests. */
+ unsigned int elts_comp_cd; /* Countdown for next completion request. */
+ unsigned int elts_comp_cd_init; /* Initial value for countdown. */
+ struct mlx4_txq_stats stats; /* TX queue counters. */
+ linear_t (*elts_linear)[]; /* Linearized buffers. */
+ struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
+ unsigned int socket; /* CPU socket ID for allocations. */
+ struct ibv_exp_res_domain *rd; /* Resource Domain. */
+};
+
+struct priv {
+ struct rte_eth_dev *dev; /* Ethernet device. */
+ struct ibv_context *ctx; /* Verbs context. */
+ struct ibv_device_attr device_attr; /* Device properties. */
+ struct ibv_pd *pd; /* Protection Domain. */
+ /*
+ * MAC addresses array and configuration bit-field.
+ * An extra entry that cannot be modified by the DPDK is reserved
+ * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
+ */
+ struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
+ BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
+ /* VLAN filters. */
+ struct {
+ unsigned int enabled:1; /* If enabled. */
+ unsigned int id:12; /* VLAN ID (0-4095). */
+ } vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
+ /* Device properties. */
+ uint16_t mtu; /* Configured MTU. */
+ uint8_t port; /* Physical port number. */
+ unsigned int started:1; /* Device started, flows enabled. */
+ unsigned int promisc:1; /* Device in promiscuous mode. */
+ unsigned int allmulti:1; /* Device receives all multicast packets. */
+ unsigned int hw_qpg:1; /* QP groups are supported. */
+ unsigned int hw_tss:1; /* TSS is supported. */
+ unsigned int hw_rss:1; /* RSS is supported. */
+ unsigned int hw_csum:1; /* Checksum offload is supported. */
+ unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
+ unsigned int rss:1; /* RSS is enabled. */
+ unsigned int vf:1; /* This is a VF device. */
+ unsigned int pending_alarm:1; /* An alarm is pending. */
+#ifdef INLINE_RECV
+ unsigned int inl_recv_size; /* Inline recv size */
+#endif
+ unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
+ /* RX/TX queues. */
+ struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
+ unsigned int rxqs_n; /* RX queues array size. */
+ unsigned int txqs_n; /* TX queues array size. */
+ struct rxq *(*rxqs)[]; /* RX queues. */
+ struct txq *(*txqs)[]; /* TX queues. */
+ struct rte_intr_handle intr_handle; /* Interrupt handler. */
+ rte_spinlock_t lock; /* Lock for control functions. */
+};
+
+void priv_lock(struct priv *priv);
+void priv_unlock(struct priv *priv);
+
#endif /* RTE_PMD_MLX4_H_ */
--
1.8.3.1
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/2] net/mlx4: split the definitions to the header file
2017-02-22 13:42 ` [dpdk-dev] [PATCH v2 " Vasily Philipov
@ 2017-02-22 19:04 ` Ferruh Yigit
2017-02-23 10:44 ` Vasily Philipov
0 siblings, 1 reply; 15+ messages in thread
From: Ferruh Yigit @ 2017-02-22 19:04 UTC (permalink / raw)
To: Vasily Philipov, dev; +Cc: Adrien Mazarguil, Nelio Laranjeiro
On 2/22/2017 1:42 PM, Vasily Philipov wrote:
> Make some structs/defines visible from different source files by placing
> them into mlx4.h header.
>
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> ---
> drivers/net/mlx4/mlx4.c | 183 ++--------------------------------------------
> drivers/net/mlx4/mlx4.h | 187 +++++++++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 189 insertions(+), 181 deletions(-)
>
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index 79efaaa..82ccac8 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -1,8 +1,8 @@
> /*-
> * BSD LICENSE
> *
> - * Copyright 2012-2015 6WIND S.A.
> - * Copyright 2012 Mellanox.
> + * Copyright 2012-2017 6WIND S.A.
> + * Copyright 2012-2017 Mellanox.
Can someone knowledgeable about Copyright help please?
What is the year field in Copyright line for?
And above change updates Copyright from 2012 to 2012-2017, is this correct?
> *
> * Redistribution and use in source and binary forms, with or without
> * modification, are permitted provided that the following conditions
> @@ -68,10 +68,6 @@
> #pragma GCC diagnostic error "-Wpedantic"
> #endif
Above invisible lines are "#include <infiniband/verbs.h>" wrapped with
#pragma for pedantic.
That piece moved to "mlx4.h" [1], which included a few lines later, so
can these line be removed from this line?
>
> -/* DPDK headers don't like -pedantic. */
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic ignored "-Wpedantic"
> -#endif
Comment says "DPDK headers don't like -pedantic", won't removing #pragma
cause compile error with pedantic option?
> #include <rte_ether.h>
> #include <rte_ethdev.h>
> #include <rte_dev.h>
> @@ -86,9 +82,6 @@
> #include <rte_log.h>
> #include <rte_alarm.h>
> #include <rte_memory.h>
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic error "-Wpedantic"
> -#endif
>
> /* Generated configuration header. */
> #include "mlx4_autoconf.h"
> @@ -96,21 +89,6 @@
> /* PMD header. */
> #include "mlx4.h"
>
<...>
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index 4c7505e..70c9ecd 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
<...>
> +
> +/* Verbs header. */
> +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic ignored "-Wpedantic"
> +#endif
> +#include <infiniband/verbs.h>
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic error "-Wpedantic"
> +#endif
--> [1]
<...>
> +
> +void priv_lock(struct priv *priv);
> +void priv_unlock(struct priv *priv);
It can be good to mention in commit log that these functions are now
exported.
> +
> #endif /* RTE_PMD_MLX4_H_ */
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/2] net/mlx4: split the definitions to the header file
2017-02-22 19:04 ` Ferruh Yigit
@ 2017-02-23 10:44 ` Vasily Philipov
2017-03-06 9:24 ` Ferruh Yigit
0 siblings, 1 reply; 15+ messages in thread
From: Vasily Philipov @ 2017-02-23 10:44 UTC (permalink / raw)
To: Ferruh Yigit, dev; +Cc: Adrien Mazarguil, Nélio Laranjeiro
Hi Ferruh,
> -----Original Message-----
> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
> Sent: Wednesday, February 22, 2017 21:05
> To: Vasily Philipov <vasilyf@mellanox.com>; dev@dpdk.org
> Cc: Adrien Mazarguil <adrien.mazarguil@6wind.com>; Nélio Laranjeiro
> <nelio.laranjeiro@6wind.com>
> Subject: Re: [dpdk-dev] [PATCH v2 1/2] net/mlx4: split the definitions to the
> header file
>
> On 2/22/2017 1:42 PM, Vasily Philipov wrote:
> > Make some structs/defines visible from different source files by
> > placing them into mlx4.h header.
> >
> > Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> > ---
> > drivers/net/mlx4/mlx4.c | 183
> > ++--------------------------------------------
> > drivers/net/mlx4/mlx4.h | 187
> > +++++++++++++++++++++++++++++++++++++++++++++++-
> > 2 files changed, 189 insertions(+), 181 deletions(-)
> >
> > diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index
> > 79efaaa..82ccac8 100644
> > --- a/drivers/net/mlx4/mlx4.c
> > +++ b/drivers/net/mlx4/mlx4.c
> > @@ -1,8 +1,8 @@
> > /*-
> > * BSD LICENSE
> > *
> > - * Copyright 2012-2015 6WIND S.A.
> > - * Copyright 2012 Mellanox.
> > + * Copyright 2012-2017 6WIND S.A.
> > + * Copyright 2012-2017 Mellanox.
>
> Can someone knowledgeable about Copyright help please?
>
> What is the year field in Copyright line for?
> And above change updates Copyright from 2012 to 2012-2017, is this correct?
>
The year line was changes in order to show when the file was changed the last time...
> > *
> > * Redistribution and use in source and binary forms, with or without
> > * modification, are permitted provided that the following conditions
> > @@ -68,10 +68,6 @@
> > #pragma GCC diagnostic error "-Wpedantic"
> > #endif
>
> Above invisible lines are "#include <infiniband/verbs.h>" wrapped with
> #pragma for pedantic.
>
> That piece moved to "mlx4.h" [1], which included a few lines later, so can
> these line be removed from this line?
>
> >
> > -/* DPDK headers don't like -pedantic. */ -#ifdef PEDANTIC -#pragma
> > GCC diagnostic ignored "-Wpedantic"
> > -#endif
>
> Comment says "DPDK headers don't like -pedantic", won't removing
> #pragma cause compile error with pedantic option?
>
It is not necessary anymore, was fixed with the next commit:
commit c0362128c57a0ad22ea311a9657bb15a44b70793
Author: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Date: Mon Jun 29 11:34:52 2015 +0200
eal: fix pedantic build of mlx4 debug mode
> > #include <rte_ether.h>
> > #include <rte_ethdev.h>
> > #include <rte_dev.h>
> > @@ -86,9 +82,6 @@
> > #include <rte_log.h>
> > #include <rte_alarm.h>
> > #include <rte_memory.h>
> > -#ifdef PEDANTIC
> > -#pragma GCC diagnostic error "-Wpedantic"
> > -#endif
> >
> > /* Generated configuration header. */ #include "mlx4_autoconf.h"
> > @@ -96,21 +89,6 @@
> > /* PMD header. */
> > #include "mlx4.h"
> >
> <...>
>
> > diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index
> > 4c7505e..70c9ecd 100644
> > --- a/drivers/net/mlx4/mlx4.h
> > +++ b/drivers/net/mlx4/mlx4.h
> <...>
> > +
> > +/* Verbs header. */
> > +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic.
> > +*/ #ifdef PEDANTIC #pragma GCC diagnostic ignored "-Wpedantic"
> > +#endif
> > +#include <infiniband/verbs.h>
> > +#ifdef PEDANTIC
> > +#pragma GCC diagnostic error "-Wpedantic"
> > +#endif
>
> --> [1]
>
> <...>
>
> > +
> > +void priv_lock(struct priv *priv);
> > +void priv_unlock(struct priv *priv);
>
> It can be good to mention in commit log that these functions are now
> exported.
>
> > +
> > #endif /* RTE_PMD_MLX4_H_ */
> >
I will fix the rest of the issues and will send the v3 patches.
Thank you,
Vasily
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/2] net/mlx4: split the definitions to the header file
2017-02-23 10:44 ` Vasily Philipov
@ 2017-03-06 9:24 ` Ferruh Yigit
0 siblings, 0 replies; 15+ messages in thread
From: Ferruh Yigit @ 2017-03-06 9:24 UTC (permalink / raw)
To: Vasily Philipov, dev
Cc: Adrien Mazarguil, Nélio Laranjeiro, Thomas Monjalon, Neil Horman
On 2/23/2017 10:44 AM, Vasily Philipov wrote:
> Hi Ferruh,
>
>> -----Original Message-----
>> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
>> Sent: Wednesday, February 22, 2017 21:05
>> To: Vasily Philipov <vasilyf@mellanox.com>; dev@dpdk.org
>> Cc: Adrien Mazarguil <adrien.mazarguil@6wind.com>; Nélio Laranjeiro
>> <nelio.laranjeiro@6wind.com>
>> Subject: Re: [dpdk-dev] [PATCH v2 1/2] net/mlx4: split the definitions to the
>> header file
>>
>> On 2/22/2017 1:42 PM, Vasily Philipov wrote:
>>> Make some structs/defines visible from different source files by
>>> placing them into mlx4.h header.
>>>
>>> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
>>> ---
>>> drivers/net/mlx4/mlx4.c | 183
>>> ++--------------------------------------------
>>> drivers/net/mlx4/mlx4.h | 187
>>> +++++++++++++++++++++++++++++++++++++++++++++++-
>>> 2 files changed, 189 insertions(+), 181 deletions(-)
>>>
>>> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index
>>> 79efaaa..82ccac8 100644
>>> --- a/drivers/net/mlx4/mlx4.c
>>> +++ b/drivers/net/mlx4/mlx4.c
>>> @@ -1,8 +1,8 @@
>>> /*-
>>> * BSD LICENSE
>>> *
>>> - * Copyright 2012-2015 6WIND S.A.
>>> - * Copyright 2012 Mellanox.
>>> + * Copyright 2012-2017 6WIND S.A.
>>> + * Copyright 2012-2017 Mellanox.
>>
>> Can someone knowledgeable about Copyright help please?
>>
>> What is the year field in Copyright line for?
>> And above change updates Copyright from 2012 to 2012-2017, is this correct?
>>
>
> The year line was changes in order to show when the file was changed the last time...
I see, but I don't know if the year field is for last updated date
marker, specially when there are multiple copyright holders.
Overall I don't know why second date is required at all, assuming first
date shows the start date of the work and sets the copyright coverage date.
A comment from who knows more about these issues is welcome.
Thanks,
ferruh
^ permalink raw reply [flat|nested] 15+ messages in thread
* [dpdk-dev] [PATCH v2 2/2] net/mlx4: support basic flow items and actions
2017-02-21 14:07 [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
` (2 preceding siblings ...)
2017-02-22 13:42 ` [dpdk-dev] [PATCH v2 " Vasily Philipov
@ 2017-02-22 13:42 ` Vasily Philipov
2017-03-05 7:51 ` [dpdk-dev] [PATCH v3 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
2017-03-05 7:51 ` [dpdk-dev] [PATCH v3 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
5 siblings, 0 replies; 15+ messages in thread
From: Vasily Philipov @ 2017-02-22 13:42 UTC (permalink / raw)
To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro
Adding support for the next items: eth, vlan, ipv4, udp, tcp and for the
next actions: queue, drop
---
drivers/net/mlx4/Makefile | 3 +-
drivers/net/mlx4/mlx4.c | 103 ++++-
drivers/net/mlx4/mlx4.h | 3 +
drivers/net/mlx4/mlx4_flow.c | 1053 ++++++++++++++++++++++++++++++++++++++++++
drivers/net/mlx4/mlx4_flow.h | 104 +++++
5 files changed, 1248 insertions(+), 18 deletions(-)
create mode 100644 drivers/net/mlx4/mlx4_flow.c
create mode 100644 drivers/net/mlx4/mlx4_flow.h
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 68c5902..1d463f7 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -36,6 +36,7 @@ LIB = librte_pmd_mlx4.a
# Sources.
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_flow.c
# Dependencies.
DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += lib/librte_ether
@@ -129,7 +130,7 @@ mlx4_autoconf.h: mlx4_autoconf.h.new
cmp '$<' '$@' $(AUTOCONF_OUTPUT) || \
mv '$<' '$@'
-mlx4.o: mlx4_autoconf.h
+$(SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD):.c=.o): mlx4_autoconf.h
clean_mlx4: FORCE
$Q rm -f -- mlx4_autoconf.h mlx4_autoconf.h.new
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 82ccac8..e892f9c 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -82,12 +82,14 @@
#include <rte_log.h>
#include <rte_alarm.h>
#include <rte_memory.h>
+#include <rte_flow.h>
/* Generated configuration header. */
#include "mlx4_autoconf.h"
-/* PMD header. */
+/* PMD headers. */
#include "mlx4.h"
+#include "mlx4_flow.h"
/* Convenience macros for accessing mbuf fields. */
#define NEXT(m) ((m)->next)
@@ -2351,6 +2353,7 @@ struct txq_mp2mr_mbuf_check_data {
assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
*attr = (struct ibv_flow_attr){
.type = IBV_FLOW_ATTR_NORMAL,
+ .priority = 3,
.num_of_specs = 1,
.port = priv->port,
.flags = 0
@@ -3938,6 +3941,7 @@ struct txq_mp2mr_mbuf_check_data {
unsigned int i = 0;
unsigned int r;
struct rxq *rxq;
+ int ret;
if (mlx4_is_secondary())
return -E_RTE_SECONDARY;
@@ -3957,36 +3961,50 @@ struct txq_mp2mr_mbuf_check_data {
}
/* Iterate only once when RSS is enabled. */
do {
- int ret;
-
/* Ignore nonexistent RX queues. */
if (rxq == NULL)
continue;
ret = rxq_mac_addrs_add(rxq);
- if (!ret && priv->promisc)
+ if (ret)
+ goto err;
+ if (priv->promisc)
ret = rxq_promiscuous_enable(rxq);
- if (!ret && priv->allmulti)
+ if (ret) {
+ rxq_mac_addrs_del(rxq);
+ goto err;
+ }
+ if (priv->allmulti)
ret = rxq_allmulticast_enable(rxq);
if (!ret)
continue;
WARN("%p: QP flow attachment failed: %s",
(void *)dev, strerror(ret));
- /* Rollback. */
- while (i != 0) {
- rxq = (*priv->rxqs)[--i];
- if (rxq != NULL) {
- rxq_allmulticast_disable(rxq);
- rxq_promiscuous_disable(rxq);
- rxq_mac_addrs_del(rxq);
- }
- }
- priv->started = 0;
- priv_unlock(priv);
- return -ret;
+ rxq_promiscuous_disable(rxq);
+ rxq_mac_addrs_del(rxq);
+ goto err;
} while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
priv_dev_interrupt_handler_install(priv, dev);
+ ret = mlx4_priv_flow_start(priv);
+ if (ret) {
+ ERROR("%p: flow start failed: %s",
+ (void *)dev, strerror(ret));
+ goto err;
+ }
priv_unlock(priv);
return 0;
+err:
+ /* Rollback. */
+ while (i != 0) {
+ rxq = (*priv->rxqs)[--i];
+ if (rxq != NULL) {
+ rxq_allmulticast_disable(rxq);
+ rxq_promiscuous_disable(rxq);
+ rxq_mac_addrs_del(rxq);
+ }
+ }
+ priv->started = 0;
+ priv_unlock(priv);
+ return -ret;
}
/**
@@ -4021,6 +4039,7 @@ struct txq_mp2mr_mbuf_check_data {
rxq = (*priv->rxqs)[0];
r = priv->rxqs_n;
}
+ mlx4_priv_flow_stop(priv);
/* Iterate only once when RSS is enabled. */
do {
/* Ignore nonexistent RX queues. */
@@ -5022,6 +5041,55 @@ struct txq_mp2mr_mbuf_check_data {
return -ret;
}
+const struct rte_flow_ops mlx4_flow_ops = {
+ .validate = mlx4_flow_validate,
+ .create = mlx4_flow_create,
+ .destroy = mlx4_flow_destroy,
+ .flush = mlx4_flow_flush,
+ .query = NULL,
+};
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param filter_type
+ * Filter type.
+ * @param filter_op
+ * Operation to perform.
+ * @param arg
+ * Pointer to operation-specific structure.
+ *
+ * @return
+ * 0 on success, negative errno value on failure.
+ */
+static int
+mlx4_dev_filter_ctrl(struct rte_eth_dev *dev,
+ enum rte_filter_type filter_type,
+ enum rte_filter_op filter_op,
+ void *arg)
+{
+ int ret = EINVAL;
+
+ switch (filter_type) {
+ case RTE_ETH_FILTER_GENERIC:
+ if (filter_op != RTE_ETH_FILTER_GET)
+ return -EINVAL;
+ *(const void **)arg = &mlx4_flow_ops;
+ return 0;
+ case RTE_ETH_FILTER_FDIR:
+ DEBUG("%p: filter type FDIR is not supported by this PMD",
+ (void *)dev);
+ break;
+ default:
+ ERROR("%p: filter type (%d) not supported",
+ (void *)dev, filter_type);
+ break;
+ }
+ return -ret;
+}
+
static const struct eth_dev_ops mlx4_dev_ops = {
.dev_configure = mlx4_dev_configure,
.dev_start = mlx4_dev_start,
@@ -5056,6 +5124,7 @@ struct txq_mp2mr_mbuf_check_data {
.mac_addr_add = mlx4_mac_addr_add,
.mac_addr_set = mlx4_mac_addr_set,
.mtu_set = mlx4_dev_set_mtu,
+ .filter_ctrl = mlx4_dev_filter_ctrl,
};
/**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 70c9ecd..fac408b 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -295,6 +295,8 @@ struct txq {
struct ibv_exp_res_domain *rd; /* Resource Domain. */
};
+struct rte_flow;
+
struct priv {
struct rte_eth_dev *dev; /* Ethernet device. */
struct ibv_context *ctx; /* Verbs context. */
@@ -337,6 +339,7 @@ struct priv {
struct rxq *(*rxqs)[]; /* RX queues. */
struct txq *(*txqs)[]; /* TX queues. */
struct rte_intr_handle intr_handle; /* Interrupt handler. */
+ LIST_HEAD(mlx4_flows, rte_flow) flows;
rte_spinlock_t lock; /* Lock for control functions. */
};
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
new file mode 100644
index 0000000..2328a18
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -0,0 +1,1053 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_malloc.h>
+
+/* Generated configuration header. */
+#include "mlx4_autoconf.h"
+
+/* PMD headers. */
+#include "mlx4.h"
+#include "mlx4_flow.h"
+
+/** Static initializer for items. */
+#define ITEMS(...) \
+ (const enum rte_flow_item_type []){ \
+ __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
+ }
+
+/** Structure to generate a simple graph of layers supported by the NIC. */
+struct mlx4_flow_items {
+ /** List of possible actions for these items. */
+ const enum rte_flow_action_type *const actions;
+ /** Bit-masks corresponding to the possibilities for the item. */
+ const void *mask;
+ /**
+ * Default bit-masks to use when item->mask is not provided. When
+ * \default_mask is also NULL, the full supported bit-mask (\mask) is
+ * used instead.
+ */
+ const void *default_mask;
+ /** Bit-masks size in bytes. */
+ const unsigned int mask_sz;
+ /**
+ * Check support for a given item.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param mask[in]
+ * Bit-masks covering supported fields to compare with spec,
+ * last and mask in
+ * \item.
+ * @param size
+ * Bit-Mask size in bytes.
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+ int (*validate)(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size);
+ /**
+ * Conversion function from rte_flow to NIC specific flow.
+ *
+ * @param item
+ * rte_flow item to convert.
+ * @param default_mask
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data
+ * Internal structure to store the conversion.
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+ int (*convert)(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data);
+ /** Size in bytes of the destination structure. */
+ const unsigned int dst_sz;
+ /** List of possible following items. */
+ const enum rte_flow_item_type *const items;
+};
+
+/** Valid action for this PMD. */
+static const enum rte_flow_action_type valid_actions[] = {
+ RTE_FLOW_ACTION_TYPE_DROP,
+ RTE_FLOW_ACTION_TYPE_QUEUE,
+ RTE_FLOW_ACTION_TYPE_END,
+};
+
+/**
+ * Convert Ethernet item to Verbs specification.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param default_mask[in]
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ * User structure.
+ */
+static int
+mlx4_flow_create_eth(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data)
+{
+ const struct rte_flow_item_eth *spec = item->spec;
+ const struct rte_flow_item_eth *mask = item->mask;
+ struct mlx4_flow *flow = (struct mlx4_flow *)data;
+ struct ibv_flow_spec_eth *eth;
+ const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+ unsigned int i;
+
+ ++flow->ibv_attr->num_of_specs;
+ flow->ibv_attr->priority = 2;
+ eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+ *eth = (struct ibv_flow_spec_eth) {
+ .type = IBV_FLOW_SPEC_ETH,
+ .size = eth_size,
+ };
+ if (!spec) {
+ flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
+ return 0;
+ }
+ if (!mask)
+ mask = default_mask;
+ memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
+ memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+ memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
+ memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+ /* Remove unwanted bits from values. */
+ for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+ eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
+ eth->val.src_mac[i] &= eth->mask.src_mac[i];
+ }
+ return 0;
+}
+
+/**
+ * Convert VLAN item to Verbs specification.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param default_mask[in]
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ * User structure.
+ */
+static int
+mlx4_flow_create_vlan(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data)
+{
+ const struct rte_flow_item_vlan *spec = item->spec;
+ const struct rte_flow_item_vlan *mask = item->mask;
+ struct mlx4_flow *flow = (struct mlx4_flow *)data;
+ struct ibv_flow_spec_eth *eth;
+ const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+
+ eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
+ if (!spec)
+ return 0;
+ if (!mask)
+ mask = default_mask;
+ eth->val.vlan_tag = spec->tci;
+ eth->mask.vlan_tag = mask->tci;
+ eth->val.vlan_tag &= eth->mask.vlan_tag;
+ return 0;
+}
+
+/**
+ * Convert IPv4 item to Verbs specification.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param default_mask[in]
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ * User structure.
+ */
+static int
+mlx4_flow_create_ipv4(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data)
+{
+ const struct rte_flow_item_ipv4 *spec = item->spec;
+ const struct rte_flow_item_ipv4 *mask = item->mask;
+ struct mlx4_flow *flow = (struct mlx4_flow *)data;
+ struct ibv_flow_spec_ipv4 *ipv4;
+ unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4);
+
+ ++flow->ibv_attr->num_of_specs;
+ flow->ibv_attr->priority = 1;
+ ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+ *ipv4 = (struct ibv_flow_spec_ipv4) {
+ .type = IBV_FLOW_SPEC_IPV4,
+ .size = ipv4_size,
+ };
+ if (!spec)
+ return 0;
+ ipv4->val = (struct ibv_flow_ipv4_filter) {
+ .src_ip = spec->hdr.src_addr,
+ .dst_ip = spec->hdr.dst_addr,
+ };
+ if (!mask)
+ mask = default_mask;
+ ipv4->mask = (struct ibv_flow_ipv4_filter) {
+ .src_ip = mask->hdr.src_addr,
+ .dst_ip = mask->hdr.dst_addr,
+ };
+ /* Remove unwanted bits from values. */
+ ipv4->val.src_ip &= ipv4->mask.src_ip;
+ ipv4->val.dst_ip &= ipv4->mask.dst_ip;
+ return 0;
+}
+
+/**
+ * Convert UDP item to Verbs specification.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param default_mask[in]
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ * User structure.
+ */
+static int
+mlx4_flow_create_udp(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data)
+{
+ const struct rte_flow_item_udp *spec = item->spec;
+ const struct rte_flow_item_udp *mask = item->mask;
+ struct mlx4_flow *flow = (struct mlx4_flow *)data;
+ struct ibv_flow_spec_tcp_udp *udp;
+ unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+
+ ++flow->ibv_attr->num_of_specs;
+ flow->ibv_attr->priority = 0;
+ udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+ *udp = (struct ibv_flow_spec_tcp_udp) {
+ .type = IBV_FLOW_SPEC_UDP,
+ .size = udp_size,
+ };
+ if (!spec)
+ return 0;
+ udp->val.dst_port = spec->hdr.dst_port;
+ udp->val.src_port = spec->hdr.src_port;
+ if (!mask)
+ mask = default_mask;
+ udp->mask.dst_port = mask->hdr.dst_port;
+ udp->mask.src_port = mask->hdr.src_port;
+ /* Remove unwanted bits from values. */
+ udp->val.src_port &= udp->mask.src_port;
+ udp->val.dst_port &= udp->mask.dst_port;
+ return 0;
+}
+
+/**
+ * Convert TCP item to Verbs specification.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param default_mask[in]
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ * User structure.
+ */
+static int
+mlx4_flow_create_tcp(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data)
+{
+ const struct rte_flow_item_tcp *spec = item->spec;
+ const struct rte_flow_item_tcp *mask = item->mask;
+ struct mlx4_flow *flow = (struct mlx4_flow *)data;
+ struct ibv_flow_spec_tcp_udp *tcp;
+ unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+
+ ++flow->ibv_attr->num_of_specs;
+ flow->ibv_attr->priority = 0;
+ tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+ *tcp = (struct ibv_flow_spec_tcp_udp) {
+ .type = IBV_FLOW_SPEC_TCP,
+ .size = tcp_size,
+ };
+ if (!spec)
+ return 0;
+ tcp->val.dst_port = spec->hdr.dst_port;
+ tcp->val.src_port = spec->hdr.src_port;
+ if (!mask)
+ mask = default_mask;
+ tcp->mask.dst_port = mask->hdr.dst_port;
+ tcp->mask.src_port = mask->hdr.src_port;
+ /* Remove unwanted bits from values. */
+ tcp->val.src_port &= tcp->mask.src_port;
+ tcp->val.dst_port &= tcp->mask.dst_port;
+ return 0;
+}
+
+/**
+ * Check support for a given item.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param mask[in]
+ * Bit-masks covering supported fields to compare with spec, last and mask in
+ * \item.
+ * @param size
+ * Bit-Mask size in bytes.
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+static int
+mlx4_flow_item_validate(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ int ret = 0;
+
+ if (!item->spec && (item->mask || item->last))
+ return -1;
+ if (item->spec && !item->mask) {
+ unsigned int i;
+ const uint8_t *spec = item->spec;
+
+ for (i = 0; i < size; ++i)
+ if ((spec[i] | mask[i]) != mask[i])
+ return -1;
+ }
+ if (item->last && !item->mask) {
+ unsigned int i;
+ const uint8_t *spec = item->last;
+
+ for (i = 0; i < size; ++i)
+ if ((spec[i] | mask[i]) != mask[i])
+ return -1;
+ }
+ if (item->spec && item->last) {
+ uint8_t spec[size];
+ uint8_t last[size];
+ const uint8_t *apply = mask;
+ unsigned int i;
+
+ if (item->mask)
+ apply = item->mask;
+ for (i = 0; i < size; ++i) {
+ spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
+ last[i] = ((const uint8_t *)item->last)[i] & apply[i];
+ }
+ ret = memcmp(spec, last, size);
+ }
+ return ret;
+}
+
+static int
+mlx4_flow_validate_eth(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ if (item->mask) {
+ const struct rte_flow_item_eth *mask = item->mask;
+
+ if (mask->dst.addr_bytes[0] != 0xff ||
+ mask->dst.addr_bytes[1] != 0xff ||
+ mask->dst.addr_bytes[2] != 0xff ||
+ mask->dst.addr_bytes[3] != 0xff ||
+ mask->dst.addr_bytes[4] != 0xff ||
+ mask->dst.addr_bytes[5] != 0xff)
+ return -1;
+ }
+ return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_vlan(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ if (item->mask) {
+ const struct rte_flow_item_vlan *mask = item->mask;
+
+ if (mask->tci != 0 &&
+ ntohs(mask->tci) != 0x0fff)
+ return -1;
+ }
+ return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_ipv4(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ if (item->mask) {
+ const struct rte_flow_item_ipv4 *mask = item->mask;
+
+ if (mask->hdr.src_addr != 0 &&
+ mask->hdr.src_addr != 0xffffffff)
+ return -1;
+ if (mask->hdr.dst_addr != 0 &&
+ mask->hdr.dst_addr != 0xffffffff)
+ return -1;
+ }
+ return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_udp(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ if (item->mask) {
+ const struct rte_flow_item_udp *mask = item->mask;
+
+ if (mask->hdr.src_port != 0 &&
+ mask->hdr.src_port != 0xffff)
+ return -1;
+ if (mask->hdr.dst_port != 0 &&
+ mask->hdr.dst_port != 0xffff)
+ return -1;
+ }
+ return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_tcp(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ if (item->mask) {
+ const struct rte_flow_item_tcp *mask = item->mask;
+
+ if (mask->hdr.src_port != 0 &&
+ mask->hdr.src_port != 0xffff)
+ return -1;
+ if (mask->hdr.dst_port != 0 &&
+ mask->hdr.dst_port != 0xffff)
+ return -1;
+ }
+ return mlx4_flow_item_validate(item, mask, size);
+}
+
+/** Graph of supported items and associated actions. */
+static const struct mlx4_flow_items mlx4_flow_items[] = {
+ [RTE_FLOW_ITEM_TYPE_END] = {
+ .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
+ },
+ [RTE_FLOW_ITEM_TYPE_ETH] = {
+ .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
+ RTE_FLOW_ITEM_TYPE_IPV4),
+ .actions = valid_actions,
+ .mask = &(const struct rte_flow_item_eth){
+ .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ },
+ .default_mask = &rte_flow_item_eth_mask,
+ .mask_sz = sizeof(struct rte_flow_item_eth),
+ .validate = mlx4_flow_validate_eth,
+ .convert = mlx4_flow_create_eth,
+ .dst_sz = sizeof(struct ibv_flow_spec_eth),
+ },
+ [RTE_FLOW_ITEM_TYPE_VLAN] = {
+ .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4),
+ .actions = valid_actions,
+ .mask = &(const struct rte_flow_item_vlan){
+ /* rte_flow_item_vlan_mask is invalid for mlx4. */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ .tci = 0x0fff,
+#else
+ .tci = 0xff0f,
+#endif
+ },
+ .mask_sz = sizeof(struct rte_flow_item_vlan),
+ .validate = mlx4_flow_validate_vlan,
+ .convert = mlx4_flow_create_vlan,
+ .dst_sz = 0,
+ },
+ [RTE_FLOW_ITEM_TYPE_IPV4] = {
+ .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
+ RTE_FLOW_ITEM_TYPE_TCP),
+ .actions = valid_actions,
+ .mask = &(const struct rte_flow_item_ipv4){
+ .hdr = {
+ .src_addr = -1,
+ .dst_addr = -1,
+ },
+ },
+ .default_mask = &rte_flow_item_ipv4_mask,
+ .mask_sz = sizeof(struct rte_flow_item_ipv4),
+ .validate = mlx4_flow_validate_ipv4,
+ .convert = mlx4_flow_create_ipv4,
+ .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
+ },
+ [RTE_FLOW_ITEM_TYPE_UDP] = {
+ .actions = valid_actions,
+ .mask = &(const struct rte_flow_item_udp){
+ .hdr = {
+ .src_port = -1,
+ .dst_port = -1,
+ },
+ },
+ .default_mask = &rte_flow_item_udp_mask,
+ .mask_sz = sizeof(struct rte_flow_item_udp),
+ .validate = mlx4_flow_validate_udp,
+ .convert = mlx4_flow_create_udp,
+ .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
+ },
+ [RTE_FLOW_ITEM_TYPE_TCP] = {
+ .actions = valid_actions,
+ .mask = &(const struct rte_flow_item_tcp){
+ .hdr = {
+ .src_port = -1,
+ .dst_port = -1,
+ },
+ },
+ .default_mask = &rte_flow_item_tcp_mask,
+ .mask_sz = sizeof(struct rte_flow_item_tcp),
+ .validate = mlx4_flow_validate_tcp,
+ .convert = mlx4_flow_create_tcp,
+ .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
+ },
+};
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ * @param[in, out] flow
+ * Flow structure to update.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_validate(struct priv *priv,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error,
+ struct mlx4_flow *flow)
+{
+ const struct mlx4_flow_items *cur_item = mlx4_flow_items;
+ struct mlx4_flow_action action = {
+ .queue = 0,
+ .drop = 0,
+ };
+
+ (void)priv;
+ if (attr->group) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+ NULL,
+ "groups are not supported");
+ return -rte_errno;
+ }
+ if (attr->priority) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+ NULL,
+ "priorities are not supported");
+ return -rte_errno;
+ }
+ if (attr->egress) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+ NULL,
+ "egress is not supported");
+ return -rte_errno;
+ }
+ if (!attr->ingress) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+ NULL,
+ "only ingress is supported");
+ return -rte_errno;
+ }
+ /* Go over items list. */
+ for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+ const struct mlx4_flow_items *token = NULL;
+ unsigned int i;
+ int err;
+
+ if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+ continue;
+ /*
+ * The nic can support patterns with NULL eth spec only
+ * if eth is a single item in a rule.
+ */
+ if (!items->spec &&
+ items->type == RTE_FLOW_ITEM_TYPE_ETH) {
+ const struct rte_flow_item *next = items + 1;
+
+ if (next->type != RTE_FLOW_ITEM_TYPE_END) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ items,
+ "the rule requires"
+ " an Ethernet spec");
+ return -rte_errno;
+ }
+ }
+ for (i = 0;
+ cur_item->items &&
+ cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
+ ++i) {
+ if (cur_item->items[i] == items->type) {
+ token = &mlx4_flow_items[items->type];
+ break;
+ }
+ }
+ if (!token)
+ goto exit_item_not_supported;
+ cur_item = token;
+ err = cur_item->validate(items,
+ (const uint8_t *)cur_item->mask,
+ cur_item->mask_sz);
+ if (err)
+ goto exit_item_not_supported;
+ if (flow->ibv_attr && cur_item->convert) {
+ err = cur_item->convert(items,
+ (cur_item->default_mask ?
+ cur_item->default_mask :
+ cur_item->mask),
+ flow);
+ if (err)
+ goto exit_item_not_supported;
+ }
+ flow->offset += cur_item->dst_sz;
+ }
+ /* Go over actions list */
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+ if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+ continue;
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+ action.drop = 1;
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+ const struct rte_flow_action_queue *queue =
+ (const struct rte_flow_action_queue *)
+ actions->conf;
+
+ if (!queue || (queue->index > (priv->rxqs_n - 1)))
+ goto exit_action_not_supported;
+ action.queue = 1;
+ } else {
+ goto exit_action_not_supported;
+ }
+ }
+ if (!action.queue && !action.drop) {
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "no valid action");
+ return -rte_errno;
+ }
+ return 0;
+exit_item_not_supported:
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+ items, "item not supported");
+ return -rte_errno;
+exit_action_not_supported:
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ actions, "action not supported");
+ return -rte_errno;
+}
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+ int ret;
+ struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr) };
+
+ priv_lock(priv);
+ ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
+ priv_unlock(priv);
+ return ret;
+}
+
+/**
+ * Complete flow rule creation.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param ibv_attr
+ * Verbs flow attributes.
+ * @param action
+ * Target action structure.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * A flow if the rule could be created.
+ */
+static struct rte_flow *
+priv_flow_create_action_queue(struct priv *priv,
+ struct ibv_flow_attr *ibv_attr,
+ struct mlx4_flow_action *action,
+ struct rte_flow_error *error)
+{
+ struct rxq *rxq;
+ struct ibv_qp *qp;
+ struct rte_flow *rte_flow;
+
+ assert(priv->pd);
+ assert(priv->ctx);
+ rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
+ if (!rte_flow) {
+ rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "cannot allocate flow memory");
+ return NULL;
+ }
+ rxq = (*priv->rxqs)[action->queue_id];
+ if (action->drop) {
+ rte_flow->cq =
+ ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
+ &(struct ibv_exp_cq_init_attr){
+ .comp_mask = 0,
+ });
+ if (!rte_flow->cq) {
+ rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "cannot allocate CQ");
+ goto error;
+ }
+ rte_flow->qp = ibv_exp_create_qp(
+ priv->ctx,
+ &(struct ibv_exp_qp_init_attr){
+ .send_cq = rte_flow->cq,
+ .recv_cq = rte_flow->cq,
+ .cap = {
+ .max_recv_wr = 1,
+ .max_recv_sge = 1,
+ },
+ .qp_type = IBV_QPT_RAW_PACKET,
+ .comp_mask =
+ IBV_EXP_QP_INIT_ATTR_PD |
+ IBV_EXP_QP_INIT_ATTR_PORT |
+ IBV_EXP_QP_INIT_ATTR_RES_DOMAIN,
+ .pd = priv->pd,
+ .res_domain = rxq->rd,
+ .port_num = priv->port,
+ });
+ if (!rte_flow->qp) {
+ rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "cannot allocate QP");
+ goto error;
+ }
+ qp = rte_flow->qp;
+ } else {
+ rte_flow->rxq = rxq;
+ qp = rxq->qp;
+ }
+ rte_flow->ibv_attr = ibv_attr;
+ rte_flow->ibv_flow = ibv_create_flow(qp, rte_flow->ibv_attr);
+ if (!rte_flow->ibv_flow) {
+ rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "flow rule creation failure");
+ goto error;
+ }
+ return rte_flow;
+
+error:
+ assert(rte_flow);
+ if (rte_flow->cq)
+ ibv_destroy_cq(rte_flow->cq);
+ if (rte_flow->qp)
+ ibv_destroy_qp(rte_flow->qp);
+ rte_free(rte_flow->ibv_attr);
+ rte_free(rte_flow);
+ return NULL;
+}
+
+/**
+ * Convert a flow.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * A flow on success, NULL otherwise.
+ */
+static struct rte_flow *
+priv_flow_create(struct priv *priv,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct rte_flow *rte_flow;
+ struct mlx4_flow_action action;
+ struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr), };
+ int err;
+
+ err = priv_flow_validate(priv, attr, items, actions, error, &flow);
+ if (err)
+ return NULL;
+ flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
+ if (!flow.ibv_attr) {
+ rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "cannot allocate ibv_attr memory");
+ return NULL;
+ }
+ flow.offset = sizeof(struct ibv_flow_attr);
+ *flow.ibv_attr = (struct ibv_flow_attr){
+ .comp_mask = 0,
+ .type = IBV_FLOW_ATTR_NORMAL,
+ .size = sizeof(struct ibv_flow_attr),
+ .priority = attr->priority,
+ .num_of_specs = 0,
+ .port = priv->port,
+ .flags = 0,
+ };
+ claim_zero(priv_flow_validate(priv, attr, items, actions,
+ error, &flow));
+ action = (struct mlx4_flow_action){
+ .queue = 0,
+ .drop = 0,
+ };
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+ if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+ continue;
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+ action.queue = 1;
+ action.queue_id =
+ ((const struct rte_flow_action_queue *)
+ actions->conf)->index;
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+ action.drop = 1;
+ } else {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions, "unsupported action");
+ goto exit;
+ }
+ }
+ rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
+ &action, error);
+ return rte_flow;
+exit:
+ rte_free(flow.ibv_attr);
+ return NULL;
+}
+
+/**
+ * Create a flow.
+ *
+ * @see rte_flow_create()
+ * @see rte_flow_ops
+ */
+struct rte_flow *
+mlx4_flow_create(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct rte_flow *flow;
+
+ priv_lock(priv);
+ flow = priv_flow_create(priv, attr, items, actions, error);
+ if (flow) {
+ LIST_INSERT_HEAD(&priv->flows, flow, next);
+ DEBUG("Flow created %p", (void *)flow);
+ }
+ priv_unlock(priv);
+ return flow;
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param[in] flow
+ * Flow to destroy.
+ */
+static void
+priv_flow_destroy(struct priv *priv, struct rte_flow *flow)
+{
+ (void)priv;
+ LIST_REMOVE(flow, next);
+ if (flow->ibv_flow)
+ claim_zero(ibv_destroy_flow(flow->ibv_flow));
+ if (flow->qp)
+ claim_zero(ibv_destroy_qp(flow->qp));
+ if (flow->cq)
+ claim_zero(ibv_destroy_cq(flow->cq));
+ rte_free(flow->ibv_attr);
+ DEBUG("Flow destroyed %p", (void *)flow);
+ rte_free(flow);
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_destroy(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+
+ (void)error;
+ priv_lock(priv);
+ priv_flow_destroy(priv, flow);
+ priv_unlock(priv);
+ return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @param priv
+ * Pointer to private structure.
+ */
+static void
+priv_flow_flush(struct priv *priv)
+{
+ while (!LIST_EMPTY(&priv->flows)) {
+ struct rte_flow *flow;
+
+ flow = LIST_FIRST(&priv->flows);
+ priv_flow_destroy(priv, flow);
+ }
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+
+ (void)error;
+ priv_lock(priv);
+ priv_flow_flush(priv);
+ priv_unlock(priv);
+ return 0;
+}
+
+/**
+ * Remove all flows.
+ *
+ * Called by dev_stop() to remove all flows.
+ *
+ * @param priv
+ * Pointer to private structure.
+ */
+void
+mlx4_priv_flow_stop(struct priv *priv)
+{
+ struct rte_flow *flow;
+
+ for (flow = LIST_FIRST(&priv->flows);
+ flow;
+ flow = LIST_NEXT(flow, next)) {
+ claim_zero(ibv_destroy_flow(flow->ibv_flow));
+ flow->ibv_flow = NULL;
+ DEBUG("Flow %p removed", (void *)flow);
+ }
+}
+
+/**
+ * Add all flows.
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, a errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_priv_flow_start(struct priv *priv)
+{
+ struct ibv_qp *qp;
+ struct rte_flow *flow;
+
+ for (flow = LIST_FIRST(&priv->flows);
+ flow;
+ flow = LIST_NEXT(flow, next)) {
+ qp = flow->qp ? flow->qp : flow->rxq->qp;
+ flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
+ if (!flow->ibv_flow) {
+ DEBUG("Flow %p cannot be applied", (void *)flow);
+ rte_errno = EINVAL;
+ return rte_errno;
+ }
+ DEBUG("Flow %p applied", (void *)flow);
+ }
+ return 0;
+}
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
new file mode 100644
index 0000000..537ffdf
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -0,0 +1,104 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_PMD_MLX4_FLOW_H_
+#define RTE_PMD_MLX4_FLOW_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/queue.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_byteorder.h>
+
+#include "mlx4.h"
+
+struct rte_flow {
+ LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
+ struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
+ struct ibv_flow *ibv_flow; /**< Verbs flow. */
+ struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+ struct ibv_qp *qp; /**< Verbs queue pair. */
+ struct ibv_cq *cq; /**< Verbs completion queue. */
+};
+
+int
+mlx4_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error);
+
+struct rte_flow *
+mlx4_flow_create(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error);
+
+int
+mlx4_flow_destroy(struct rte_eth_dev *,
+ struct rte_flow *,
+ struct rte_flow_error *);
+
+int
+mlx4_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error);
+
+/** Structure to pass to the conversion function. */
+struct mlx4_flow {
+ struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
+ unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
+};
+
+struct mlx4_flow_action {
+ uint32_t drop:1; /**< Target is a drop queue. */
+ uint32_t queue:1; /**< Target is a receive queue. */
+ uint32_t queue_id; /**< Identifier of the queue. */
+};
+
+int mlx4_priv_flow_start(struct priv *);
+void mlx4_priv_flow_stop(struct priv *);
+
+#endif /* RTE_PMD_MLX4_FLOW_H_ */
--
1.8.3.1
^ permalink raw reply [flat|nested] 15+ messages in thread
* [dpdk-dev] [PATCH v3 1/2] net/mlx4: split the definitions to the header file
2017-02-21 14:07 [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
` (3 preceding siblings ...)
2017-02-22 13:42 ` [dpdk-dev] [PATCH v2 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
@ 2017-03-05 7:51 ` Vasily Philipov
2017-03-20 9:19 ` Nélio Laranjeiro
2017-03-05 7:51 ` [dpdk-dev] [PATCH v3 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
5 siblings, 1 reply; 15+ messages in thread
From: Vasily Philipov @ 2017-03-05 7:51 UTC (permalink / raw)
To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro
Make priv_lock/priv_unlock functions and some other structs/defines visible
from different source files by placing them into mlx4.h header.
Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
drivers/net/mlx4/mlx4.c | 193 +-----------------------------------------------
drivers/net/mlx4/mlx4.h | 187 +++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 189 insertions(+), 191 deletions(-)
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 79efaaa..8f6c57f 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -1,8 +1,8 @@
/*-
* BSD LICENSE
*
- * Copyright 2012-2015 6WIND S.A.
- * Copyright 2012 Mellanox.
+ * Copyright 2012-2017 6WIND S.A.
+ * Copyright 2012-2017 Mellanox.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -58,20 +58,6 @@
#include <linux/sockios.h>
#include <fcntl.h>
-/* Verbs header. */
-/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-#include <infiniband/verbs.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
-
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_dev.h>
@@ -86,9 +72,6 @@
#include <rte_log.h>
#include <rte_alarm.h>
#include <rte_memory.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
/* Generated configuration header. */
#include "mlx4_autoconf.h"
@@ -96,21 +79,6 @@
/* PMD header. */
#include "mlx4.h"
-/* Runtime logging through RTE_LOG() is enabled when not in debugging mode.
- * Intermediate LOG_*() macros add the required end-of-line characters. */
-#ifndef NDEBUG
-#define INFO(...) DEBUG(__VA_ARGS__)
-#define WARN(...) DEBUG(__VA_ARGS__)
-#define ERROR(...) DEBUG(__VA_ARGS__)
-#else
-#define LOG__(level, m, ...) \
- RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
-#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
-#define INFO(...) LOG_(INFO, __VA_ARGS__)
-#define WARN(...) LOG_(WARNING, __VA_ARGS__)
-#define ERROR(...) LOG_(ERR, __VA_ARGS__)
-#endif
-
/* Convenience macros for accessing mbuf fields. */
#define NEXT(m) ((m)->next)
#define DATA_LEN(m) ((m)->data_len)
@@ -137,157 +105,6 @@
(((val) & (from)) / ((from) / (to))) : \
(((val) & (from)) * ((to) / (from))))
-struct mlx4_rxq_stats {
- unsigned int idx; /**< Mapping index. */
-#ifdef MLX4_PMD_SOFT_COUNTERS
- uint64_t ipackets; /**< Total of successfully received packets. */
- uint64_t ibytes; /**< Total of successfully received bytes. */
-#endif
- uint64_t idropped; /**< Total of packets dropped when RX ring full. */
- uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
-};
-
-struct mlx4_txq_stats {
- unsigned int idx; /**< Mapping index. */
-#ifdef MLX4_PMD_SOFT_COUNTERS
- uint64_t opackets; /**< Total of successfully sent packets. */
- uint64_t obytes; /**< Total of successfully sent bytes. */
-#endif
- uint64_t odropped; /**< Total of packets not sent when TX ring full. */
-};
-
-/* RX element (scattered packets). */
-struct rxq_elt_sp {
- struct ibv_recv_wr wr; /* Work Request. */
- struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
- struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
-};
-
-/* RX element. */
-struct rxq_elt {
- struct ibv_recv_wr wr; /* Work Request. */
- struct ibv_sge sge; /* Scatter/Gather Element. */
- /* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
-};
-
-/* RX queue descriptor. */
-struct rxq {
- struct priv *priv; /* Back pointer to private data. */
- struct rte_mempool *mp; /* Memory Pool for allocations. */
- struct ibv_mr *mr; /* Memory Region (for mp). */
- struct ibv_cq *cq; /* Completion Queue. */
- struct ibv_qp *qp; /* Queue Pair. */
- struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
- struct ibv_exp_cq_family *if_cq; /* CQ interface. */
- /*
- * Each VLAN ID requires a separate flow steering rule.
- */
- BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
- struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
- struct ibv_flow *promisc_flow; /* Promiscuous flow. */
- struct ibv_flow *allmulti_flow; /* Multicast flow. */
- unsigned int port_id; /* Port ID for incoming packets. */
- unsigned int elts_n; /* (*elts)[] length. */
- unsigned int elts_head; /* Current index in (*elts)[]. */
- union {
- struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
- struct rxq_elt (*no_sp)[]; /* RX elements. */
- } elts;
- unsigned int sp:1; /* Use scattered RX elements. */
- unsigned int csum:1; /* Enable checksum offloading. */
- unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
- struct mlx4_rxq_stats stats; /* RX queue counters. */
- unsigned int socket; /* CPU socket ID for allocations. */
- struct ibv_exp_res_domain *rd; /* Resource Domain. */
-};
-
-/* TX element. */
-struct txq_elt {
- struct rte_mbuf *buf;
-};
-
-/* Linear buffer type. It is used when transmitting buffers with too many
- * segments that do not fit the hardware queue (see max_send_sge).
- * Extra segments are copied (linearized) in such buffers, replacing the
- * last SGE during TX.
- * The size is arbitrary but large enough to hold a jumbo frame with
- * 8 segments considering mbuf.buf_len is about 2048 bytes. */
-typedef uint8_t linear_t[16384];
-
-/* TX queue descriptor. */
-struct txq {
- struct priv *priv; /* Back pointer to private data. */
- struct {
- const struct rte_mempool *mp; /* Cached Memory Pool. */
- struct ibv_mr *mr; /* Memory Region (for mp). */
- uint32_t lkey; /* mr->lkey */
- } mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
- struct ibv_cq *cq; /* Completion Queue. */
- struct ibv_qp *qp; /* Queue Pair. */
- struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
- struct ibv_exp_cq_family *if_cq; /* CQ interface. */
-#if MLX4_PMD_MAX_INLINE > 0
- uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
-#endif
- unsigned int elts_n; /* (*elts)[] length. */
- struct txq_elt (*elts)[]; /* TX elements. */
- unsigned int elts_head; /* Current index in (*elts)[]. */
- unsigned int elts_tail; /* First element awaiting completion. */
- unsigned int elts_comp; /* Number of completion requests. */
- unsigned int elts_comp_cd; /* Countdown for next completion request. */
- unsigned int elts_comp_cd_init; /* Initial value for countdown. */
- struct mlx4_txq_stats stats; /* TX queue counters. */
- linear_t (*elts_linear)[]; /* Linearized buffers. */
- struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
- unsigned int socket; /* CPU socket ID for allocations. */
- struct ibv_exp_res_domain *rd; /* Resource Domain. */
-};
-
-struct priv {
- struct rte_eth_dev *dev; /* Ethernet device. */
- struct ibv_context *ctx; /* Verbs context. */
- struct ibv_device_attr device_attr; /* Device properties. */
- struct ibv_pd *pd; /* Protection Domain. */
- /*
- * MAC addresses array and configuration bit-field.
- * An extra entry that cannot be modified by the DPDK is reserved
- * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
- */
- struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
- BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
- /* VLAN filters. */
- struct {
- unsigned int enabled:1; /* If enabled. */
- unsigned int id:12; /* VLAN ID (0-4095). */
- } vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
- /* Device properties. */
- uint16_t mtu; /* Configured MTU. */
- uint8_t port; /* Physical port number. */
- unsigned int started:1; /* Device started, flows enabled. */
- unsigned int promisc:1; /* Device in promiscuous mode. */
- unsigned int allmulti:1; /* Device receives all multicast packets. */
- unsigned int hw_qpg:1; /* QP groups are supported. */
- unsigned int hw_tss:1; /* TSS is supported. */
- unsigned int hw_rss:1; /* RSS is supported. */
- unsigned int hw_csum:1; /* Checksum offload is supported. */
- unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
- unsigned int rss:1; /* RSS is enabled. */
- unsigned int vf:1; /* This is a VF device. */
- unsigned int pending_alarm:1; /* An alarm is pending. */
-#ifdef INLINE_RECV
- unsigned int inl_recv_size; /* Inline recv size */
-#endif
- unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
- /* RX/TX queues. */
- struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
- unsigned int rxqs_n; /* RX queues array size. */
- unsigned int txqs_n; /* TX queues array size. */
- struct rxq *(*rxqs)[]; /* RX queues. */
- struct txq *(*txqs)[]; /* TX queues. */
- struct rte_intr_handle intr_handle; /* Interrupt handler. */
- rte_spinlock_t lock; /* Lock for control functions. */
-};
-
/* Local storage for secondary process data. */
struct mlx4_secondary_data {
struct rte_eth_dev_data data; /* Local device data. */
@@ -335,8 +152,7 @@ struct mlx4_secondary_data {
* @param priv
* Pointer to private structure.
*/
-static void
-priv_lock(struct priv *priv)
+void priv_lock(struct priv *priv)
{
rte_spinlock_lock(&priv->lock);
}
@@ -347,8 +163,7 @@ struct mlx4_secondary_data {
* @param priv
* Pointer to private structure.
*/
-static void
-priv_unlock(struct priv *priv)
+void priv_unlock(struct priv *priv)
{
rte_spinlock_unlock(&priv->lock);
}
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 4c7505e..70c9ecd 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -1,8 +1,8 @@
/*-
* BSD LICENSE
*
- * Copyright 2012-2015 6WIND S.A.
- * Copyright 2012 Mellanox.
+ * Copyright 2012-2017 6WIND S.A.
+ * Copyright 2012-2017 Mellanox.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -39,6 +39,33 @@
#include <limits.h>
/*
+ * Runtime logging through RTE_LOG() is enabled when not in debugging mode.
+ * Intermediate LOG_*() macros add the required end-of-line characters.
+ */
+#ifndef NDEBUG
+#define INFO(...) DEBUG(__VA_ARGS__)
+#define WARN(...) DEBUG(__VA_ARGS__)
+#define ERROR(...) DEBUG(__VA_ARGS__)
+#else
+#define LOG__(level, m, ...) \
+ RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
+#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
+#define INFO(...) LOG_(INFO, __VA_ARGS__)
+#define WARN(...) LOG_(WARNING, __VA_ARGS__)
+#define ERROR(...) LOG_(ERR, __VA_ARGS__)
+#endif
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+/*
* Maximum number of simultaneous MAC addresses supported.
*
* According to ConnectX's Programmer Reference Manual:
@@ -160,4 +187,160 @@ enum {
#define claim_positive(...) (__VA_ARGS__)
#endif /* NDEBUG */
+struct mlx4_rxq_stats {
+ unsigned int idx; /**< Mapping index. */
+#ifdef MLX4_PMD_SOFT_COUNTERS
+ uint64_t ipackets; /**< Total of successfully received packets. */
+ uint64_t ibytes; /**< Total of successfully received bytes. */
+#endif
+ uint64_t idropped; /**< Total of packets dropped when RX ring full. */
+ uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
+};
+
+/* RX element (scattered packets). */
+struct rxq_elt_sp {
+ struct ibv_recv_wr wr; /* Work Request. */
+ struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
+ struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
+};
+
+/* RX element. */
+struct rxq_elt {
+ struct ibv_recv_wr wr; /* Work Request. */
+ struct ibv_sge sge; /* Scatter/Gather Element. */
+ /* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
+};
+
+/* RX queue descriptor. */
+struct rxq {
+ struct priv *priv; /* Back pointer to private data. */
+ struct rte_mempool *mp; /* Memory Pool for allocations. */
+ struct ibv_mr *mr; /* Memory Region (for mp). */
+ struct ibv_cq *cq; /* Completion Queue. */
+ struct ibv_qp *qp; /* Queue Pair. */
+ struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
+ struct ibv_exp_cq_family *if_cq; /* CQ interface. */
+ /*
+ * Each VLAN ID requires a separate flow steering rule.
+ */
+ BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
+ struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
+ struct ibv_flow *promisc_flow; /* Promiscuous flow. */
+ struct ibv_flow *allmulti_flow; /* Multicast flow. */
+ unsigned int port_id; /* Port ID for incoming packets. */
+ unsigned int elts_n; /* (*elts)[] length. */
+ unsigned int elts_head; /* Current index in (*elts)[]. */
+ union {
+ struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
+ struct rxq_elt (*no_sp)[]; /* RX elements. */
+ } elts;
+ unsigned int sp:1; /* Use scattered RX elements. */
+ unsigned int csum:1; /* Enable checksum offloading. */
+ unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
+ struct mlx4_rxq_stats stats; /* RX queue counters. */
+ unsigned int socket; /* CPU socket ID for allocations. */
+ struct ibv_exp_res_domain *rd; /* Resource Domain. */
+};
+
+/* TX element. */
+struct txq_elt {
+ struct rte_mbuf *buf;
+};
+
+struct mlx4_txq_stats {
+ unsigned int idx; /**< Mapping index. */
+#ifdef MLX4_PMD_SOFT_COUNTERS
+ uint64_t opackets; /**< Total of successfully sent packets. */
+ uint64_t obytes; /**< Total of successfully sent bytes. */
+#endif
+ uint64_t odropped; /**< Total of packets not sent when TX ring full. */
+};
+
+/*
+ * Linear buffer type. It is used when transmitting buffers with too many
+ * segments that do not fit the hardware queue (see max_send_sge).
+ * Extra segments are copied (linearized) in such buffers, replacing the
+ * last SGE during TX.
+ * The size is arbitrary but large enough to hold a jumbo frame with
+ * 8 segments considering mbuf.buf_len is about 2048 bytes.
+ */
+typedef uint8_t linear_t[16384];
+
+/* TX queue descriptor. */
+struct txq {
+ struct priv *priv; /* Back pointer to private data. */
+ struct {
+ const struct rte_mempool *mp; /* Cached Memory Pool. */
+ struct ibv_mr *mr; /* Memory Region (for mp). */
+ uint32_t lkey; /* mr->lkey */
+ } mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
+ struct ibv_cq *cq; /* Completion Queue. */
+ struct ibv_qp *qp; /* Queue Pair. */
+ struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
+ struct ibv_exp_cq_family *if_cq; /* CQ interface. */
+#if MLX4_PMD_MAX_INLINE > 0
+ uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
+#endif
+ unsigned int elts_n; /* (*elts)[] length. */
+ struct txq_elt (*elts)[]; /* TX elements. */
+ unsigned int elts_head; /* Current index in (*elts)[]. */
+ unsigned int elts_tail; /* First element awaiting completion. */
+ unsigned int elts_comp; /* Number of completion requests. */
+ unsigned int elts_comp_cd; /* Countdown for next completion request. */
+ unsigned int elts_comp_cd_init; /* Initial value for countdown. */
+ struct mlx4_txq_stats stats; /* TX queue counters. */
+ linear_t (*elts_linear)[]; /* Linearized buffers. */
+ struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
+ unsigned int socket; /* CPU socket ID for allocations. */
+ struct ibv_exp_res_domain *rd; /* Resource Domain. */
+};
+
+struct priv {
+ struct rte_eth_dev *dev; /* Ethernet device. */
+ struct ibv_context *ctx; /* Verbs context. */
+ struct ibv_device_attr device_attr; /* Device properties. */
+ struct ibv_pd *pd; /* Protection Domain. */
+ /*
+ * MAC addresses array and configuration bit-field.
+ * An extra entry that cannot be modified by the DPDK is reserved
+ * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
+ */
+ struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
+ BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
+ /* VLAN filters. */
+ struct {
+ unsigned int enabled:1; /* If enabled. */
+ unsigned int id:12; /* VLAN ID (0-4095). */
+ } vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
+ /* Device properties. */
+ uint16_t mtu; /* Configured MTU. */
+ uint8_t port; /* Physical port number. */
+ unsigned int started:1; /* Device started, flows enabled. */
+ unsigned int promisc:1; /* Device in promiscuous mode. */
+ unsigned int allmulti:1; /* Device receives all multicast packets. */
+ unsigned int hw_qpg:1; /* QP groups are supported. */
+ unsigned int hw_tss:1; /* TSS is supported. */
+ unsigned int hw_rss:1; /* RSS is supported. */
+ unsigned int hw_csum:1; /* Checksum offload is supported. */
+ unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
+ unsigned int rss:1; /* RSS is enabled. */
+ unsigned int vf:1; /* This is a VF device. */
+ unsigned int pending_alarm:1; /* An alarm is pending. */
+#ifdef INLINE_RECV
+ unsigned int inl_recv_size; /* Inline recv size */
+#endif
+ unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
+ /* RX/TX queues. */
+ struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
+ unsigned int rxqs_n; /* RX queues array size. */
+ unsigned int txqs_n; /* TX queues array size. */
+ struct rxq *(*rxqs)[]; /* RX queues. */
+ struct txq *(*txqs)[]; /* TX queues. */
+ struct rte_intr_handle intr_handle; /* Interrupt handler. */
+ rte_spinlock_t lock; /* Lock for control functions. */
+};
+
+void priv_lock(struct priv *priv);
+void priv_unlock(struct priv *priv);
+
#endif /* RTE_PMD_MLX4_H_ */
--
1.8.3.1
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dpdk-dev] [PATCH v3 1/2] net/mlx4: split the definitions to the header file
2017-03-05 7:51 ` [dpdk-dev] [PATCH v3 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
@ 2017-03-20 9:19 ` Nélio Laranjeiro
2017-03-20 14:18 ` Ferruh Yigit
0 siblings, 1 reply; 15+ messages in thread
From: Nélio Laranjeiro @ 2017-03-20 9:19 UTC (permalink / raw)
To: Vasily Philipov; +Cc: dev, Adrien Mazarguil
On Sun, Mar 05, 2017 at 09:51:31AM +0200, Vasily Philipov wrote:
> Make priv_lock/priv_unlock functions and some other structs/defines visible
> from different source files by placing them into mlx4.h header.
>
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> ---
> drivers/net/mlx4/mlx4.c | 193 +-----------------------------------------------
> drivers/net/mlx4/mlx4.h | 187 +++++++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 189 insertions(+), 191 deletions(-)
>
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index 79efaaa..8f6c57f 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -1,8 +1,8 @@
> /*-
> * BSD LICENSE
> *
> - * Copyright 2012-2015 6WIND S.A.
> - * Copyright 2012 Mellanox.
> + * Copyright 2012-2017 6WIND S.A.
> + * Copyright 2012-2017 Mellanox.
> *
> * Redistribution and use in source and binary forms, with or without
> * modification, are permitted provided that the following conditions
> @@ -58,20 +58,6 @@
> #include <linux/sockios.h>
> #include <fcntl.h>
>
> -/* Verbs header. */
> -/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic ignored "-Wpedantic"
> -#endif
> -#include <infiniband/verbs.h>
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic error "-Wpedantic"
> -#endif
> -
> -/* DPDK headers don't like -pedantic. */
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic ignored "-Wpedantic"
> -#endif
> #include <rte_ether.h>
> #include <rte_ethdev.h>
> #include <rte_dev.h>
> @@ -86,9 +72,6 @@
> #include <rte_log.h>
> #include <rte_alarm.h>
> #include <rte_memory.h>
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic error "-Wpedantic"
> -#endif
>
> /* Generated configuration header. */
> #include "mlx4_autoconf.h"
> @@ -96,21 +79,6 @@
> /* PMD header. */
> #include "mlx4.h"
>
> -/* Runtime logging through RTE_LOG() is enabled when not in debugging mode.
> - * Intermediate LOG_*() macros add the required end-of-line characters. */
> -#ifndef NDEBUG
> -#define INFO(...) DEBUG(__VA_ARGS__)
> -#define WARN(...) DEBUG(__VA_ARGS__)
> -#define ERROR(...) DEBUG(__VA_ARGS__)
> -#else
> -#define LOG__(level, m, ...) \
> - RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
> -#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
> -#define INFO(...) LOG_(INFO, __VA_ARGS__)
> -#define WARN(...) LOG_(WARNING, __VA_ARGS__)
> -#define ERROR(...) LOG_(ERR, __VA_ARGS__)
> -#endif
> -
> /* Convenience macros for accessing mbuf fields. */
> #define NEXT(m) ((m)->next)
> #define DATA_LEN(m) ((m)->data_len)
> @@ -137,157 +105,6 @@
> (((val) & (from)) / ((from) / (to))) : \
> (((val) & (from)) * ((to) / (from))))
>
> -struct mlx4_rxq_stats {
> - unsigned int idx; /**< Mapping index. */
> -#ifdef MLX4_PMD_SOFT_COUNTERS
> - uint64_t ipackets; /**< Total of successfully received packets. */
> - uint64_t ibytes; /**< Total of successfully received bytes. */
> -#endif
> - uint64_t idropped; /**< Total of packets dropped when RX ring full. */
> - uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
> -};
> -
> -struct mlx4_txq_stats {
> - unsigned int idx; /**< Mapping index. */
> -#ifdef MLX4_PMD_SOFT_COUNTERS
> - uint64_t opackets; /**< Total of successfully sent packets. */
> - uint64_t obytes; /**< Total of successfully sent bytes. */
> -#endif
> - uint64_t odropped; /**< Total of packets not sent when TX ring full. */
> -};
> -
> -/* RX element (scattered packets). */
> -struct rxq_elt_sp {
> - struct ibv_recv_wr wr; /* Work Request. */
> - struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
> - struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
> -};
> -
> -/* RX element. */
> -struct rxq_elt {
> - struct ibv_recv_wr wr; /* Work Request. */
> - struct ibv_sge sge; /* Scatter/Gather Element. */
> - /* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
> -};
> -
> -/* RX queue descriptor. */
> -struct rxq {
> - struct priv *priv; /* Back pointer to private data. */
> - struct rte_mempool *mp; /* Memory Pool for allocations. */
> - struct ibv_mr *mr; /* Memory Region (for mp). */
> - struct ibv_cq *cq; /* Completion Queue. */
> - struct ibv_qp *qp; /* Queue Pair. */
> - struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> - struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> - /*
> - * Each VLAN ID requires a separate flow steering rule.
> - */
> - BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> - struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
> - struct ibv_flow *promisc_flow; /* Promiscuous flow. */
> - struct ibv_flow *allmulti_flow; /* Multicast flow. */
> - unsigned int port_id; /* Port ID for incoming packets. */
> - unsigned int elts_n; /* (*elts)[] length. */
> - unsigned int elts_head; /* Current index in (*elts)[]. */
> - union {
> - struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
> - struct rxq_elt (*no_sp)[]; /* RX elements. */
> - } elts;
> - unsigned int sp:1; /* Use scattered RX elements. */
> - unsigned int csum:1; /* Enable checksum offloading. */
> - unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
> - struct mlx4_rxq_stats stats; /* RX queue counters. */
> - unsigned int socket; /* CPU socket ID for allocations. */
> - struct ibv_exp_res_domain *rd; /* Resource Domain. */
> -};
> -
> -/* TX element. */
> -struct txq_elt {
> - struct rte_mbuf *buf;
> -};
> -
> -/* Linear buffer type. It is used when transmitting buffers with too many
> - * segments that do not fit the hardware queue (see max_send_sge).
> - * Extra segments are copied (linearized) in such buffers, replacing the
> - * last SGE during TX.
> - * The size is arbitrary but large enough to hold a jumbo frame with
> - * 8 segments considering mbuf.buf_len is about 2048 bytes. */
> -typedef uint8_t linear_t[16384];
> -
> -/* TX queue descriptor. */
> -struct txq {
> - struct priv *priv; /* Back pointer to private data. */
> - struct {
> - const struct rte_mempool *mp; /* Cached Memory Pool. */
> - struct ibv_mr *mr; /* Memory Region (for mp). */
> - uint32_t lkey; /* mr->lkey */
> - } mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
> - struct ibv_cq *cq; /* Completion Queue. */
> - struct ibv_qp *qp; /* Queue Pair. */
> - struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> - struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> -#if MLX4_PMD_MAX_INLINE > 0
> - uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
> -#endif
> - unsigned int elts_n; /* (*elts)[] length. */
> - struct txq_elt (*elts)[]; /* TX elements. */
> - unsigned int elts_head; /* Current index in (*elts)[]. */
> - unsigned int elts_tail; /* First element awaiting completion. */
> - unsigned int elts_comp; /* Number of completion requests. */
> - unsigned int elts_comp_cd; /* Countdown for next completion request. */
> - unsigned int elts_comp_cd_init; /* Initial value for countdown. */
> - struct mlx4_txq_stats stats; /* TX queue counters. */
> - linear_t (*elts_linear)[]; /* Linearized buffers. */
> - struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
> - unsigned int socket; /* CPU socket ID for allocations. */
> - struct ibv_exp_res_domain *rd; /* Resource Domain. */
> -};
> -
> -struct priv {
> - struct rte_eth_dev *dev; /* Ethernet device. */
> - struct ibv_context *ctx; /* Verbs context. */
> - struct ibv_device_attr device_attr; /* Device properties. */
> - struct ibv_pd *pd; /* Protection Domain. */
> - /*
> - * MAC addresses array and configuration bit-field.
> - * An extra entry that cannot be modified by the DPDK is reserved
> - * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
> - */
> - struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
> - BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> - /* VLAN filters. */
> - struct {
> - unsigned int enabled:1; /* If enabled. */
> - unsigned int id:12; /* VLAN ID (0-4095). */
> - } vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
> - /* Device properties. */
> - uint16_t mtu; /* Configured MTU. */
> - uint8_t port; /* Physical port number. */
> - unsigned int started:1; /* Device started, flows enabled. */
> - unsigned int promisc:1; /* Device in promiscuous mode. */
> - unsigned int allmulti:1; /* Device receives all multicast packets. */
> - unsigned int hw_qpg:1; /* QP groups are supported. */
> - unsigned int hw_tss:1; /* TSS is supported. */
> - unsigned int hw_rss:1; /* RSS is supported. */
> - unsigned int hw_csum:1; /* Checksum offload is supported. */
> - unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
> - unsigned int rss:1; /* RSS is enabled. */
> - unsigned int vf:1; /* This is a VF device. */
> - unsigned int pending_alarm:1; /* An alarm is pending. */
> -#ifdef INLINE_RECV
> - unsigned int inl_recv_size; /* Inline recv size */
> -#endif
> - unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
> - /* RX/TX queues. */
> - struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
> - unsigned int rxqs_n; /* RX queues array size. */
> - unsigned int txqs_n; /* TX queues array size. */
> - struct rxq *(*rxqs)[]; /* RX queues. */
> - struct txq *(*txqs)[]; /* TX queues. */
> - struct rte_intr_handle intr_handle; /* Interrupt handler. */
> - rte_spinlock_t lock; /* Lock for control functions. */
> -};
> -
> /* Local storage for secondary process data. */
> struct mlx4_secondary_data {
> struct rte_eth_dev_data data; /* Local device data. */
> @@ -335,8 +152,7 @@ struct mlx4_secondary_data {
> * @param priv
> * Pointer to private structure.
> */
> -static void
> -priv_lock(struct priv *priv)
> +void priv_lock(struct priv *priv)
> {
> rte_spinlock_lock(&priv->lock);
> }
> @@ -347,8 +163,7 @@ struct mlx4_secondary_data {
> * @param priv
> * Pointer to private structure.
> */
> -static void
> -priv_unlock(struct priv *priv)
> +void priv_unlock(struct priv *priv)
> {
> rte_spinlock_unlock(&priv->lock);
> }
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index 4c7505e..70c9ecd 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -1,8 +1,8 @@
> /*-
> * BSD LICENSE
> *
> - * Copyright 2012-2015 6WIND S.A.
> - * Copyright 2012 Mellanox.
> + * Copyright 2012-2017 6WIND S.A.
> + * Copyright 2012-2017 Mellanox.
> *
> * Redistribution and use in source and binary forms, with or without
> * modification, are permitted provided that the following conditions
> @@ -39,6 +39,33 @@
> #include <limits.h>
>
> /*
> + * Runtime logging through RTE_LOG() is enabled when not in debugging mode.
> + * Intermediate LOG_*() macros add the required end-of-line characters.
> + */
> +#ifndef NDEBUG
> +#define INFO(...) DEBUG(__VA_ARGS__)
> +#define WARN(...) DEBUG(__VA_ARGS__)
> +#define ERROR(...) DEBUG(__VA_ARGS__)
> +#else
> +#define LOG__(level, m, ...) \
> + RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
> +#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
> +#define INFO(...) LOG_(INFO, __VA_ARGS__)
> +#define WARN(...) LOG_(WARNING, __VA_ARGS__)
> +#define ERROR(...) LOG_(ERR, __VA_ARGS__)
> +#endif
> +
> +/* Verbs header. */
> +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic ignored "-Wpedantic"
> +#endif
> +#include <infiniband/verbs.h>
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic error "-Wpedantic"
> +#endif
> +
> +/*
> * Maximum number of simultaneous MAC addresses supported.
> *
> * According to ConnectX's Programmer Reference Manual:
> @@ -160,4 +187,160 @@ enum {
> #define claim_positive(...) (__VA_ARGS__)
> #endif /* NDEBUG */
>
> +struct mlx4_rxq_stats {
> + unsigned int idx; /**< Mapping index. */
> +#ifdef MLX4_PMD_SOFT_COUNTERS
> + uint64_t ipackets; /**< Total of successfully received packets. */
> + uint64_t ibytes; /**< Total of successfully received bytes. */
> +#endif
> + uint64_t idropped; /**< Total of packets dropped when RX ring full. */
> + uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
> +};
> +
> +/* RX element (scattered packets). */
> +struct rxq_elt_sp {
> + struct ibv_recv_wr wr; /* Work Request. */
> + struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
> + struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
> +};
> +
> +/* RX element. */
> +struct rxq_elt {
> + struct ibv_recv_wr wr; /* Work Request. */
> + struct ibv_sge sge; /* Scatter/Gather Element. */
> + /* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
> +};
> +
> +/* RX queue descriptor. */
> +struct rxq {
> + struct priv *priv; /* Back pointer to private data. */
> + struct rte_mempool *mp; /* Memory Pool for allocations. */
> + struct ibv_mr *mr; /* Memory Region (for mp). */
> + struct ibv_cq *cq; /* Completion Queue. */
> + struct ibv_qp *qp; /* Queue Pair. */
> + struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> + struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> + /*
> + * Each VLAN ID requires a separate flow steering rule.
> + */
> + BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> + struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
> + struct ibv_flow *promisc_flow; /* Promiscuous flow. */
> + struct ibv_flow *allmulti_flow; /* Multicast flow. */
> + unsigned int port_id; /* Port ID for incoming packets. */
> + unsigned int elts_n; /* (*elts)[] length. */
> + unsigned int elts_head; /* Current index in (*elts)[]. */
> + union {
> + struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
> + struct rxq_elt (*no_sp)[]; /* RX elements. */
> + } elts;
> + unsigned int sp:1; /* Use scattered RX elements. */
> + unsigned int csum:1; /* Enable checksum offloading. */
> + unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
> + struct mlx4_rxq_stats stats; /* RX queue counters. */
> + unsigned int socket; /* CPU socket ID for allocations. */
> + struct ibv_exp_res_domain *rd; /* Resource Domain. */
> +};
> +
> +/* TX element. */
> +struct txq_elt {
> + struct rte_mbuf *buf;
> +};
> +
> +struct mlx4_txq_stats {
> + unsigned int idx; /**< Mapping index. */
> +#ifdef MLX4_PMD_SOFT_COUNTERS
> + uint64_t opackets; /**< Total of successfully sent packets. */
> + uint64_t obytes; /**< Total of successfully sent bytes. */
> +#endif
> + uint64_t odropped; /**< Total of packets not sent when TX ring full. */
> +};
> +
> +/*
> + * Linear buffer type. It is used when transmitting buffers with too many
> + * segments that do not fit the hardware queue (see max_send_sge).
> + * Extra segments are copied (linearized) in such buffers, replacing the
> + * last SGE during TX.
> + * The size is arbitrary but large enough to hold a jumbo frame with
> + * 8 segments considering mbuf.buf_len is about 2048 bytes.
> + */
> +typedef uint8_t linear_t[16384];
> +
> +/* TX queue descriptor. */
> +struct txq {
> + struct priv *priv; /* Back pointer to private data. */
> + struct {
> + const struct rte_mempool *mp; /* Cached Memory Pool. */
> + struct ibv_mr *mr; /* Memory Region (for mp). */
> + uint32_t lkey; /* mr->lkey */
> + } mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
> + struct ibv_cq *cq; /* Completion Queue. */
> + struct ibv_qp *qp; /* Queue Pair. */
> + struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> + struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> +#if MLX4_PMD_MAX_INLINE > 0
> + uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
> +#endif
> + unsigned int elts_n; /* (*elts)[] length. */
> + struct txq_elt (*elts)[]; /* TX elements. */
> + unsigned int elts_head; /* Current index in (*elts)[]. */
> + unsigned int elts_tail; /* First element awaiting completion. */
> + unsigned int elts_comp; /* Number of completion requests. */
> + unsigned int elts_comp_cd; /* Countdown for next completion request. */
> + unsigned int elts_comp_cd_init; /* Initial value for countdown. */
> + struct mlx4_txq_stats stats; /* TX queue counters. */
> + linear_t (*elts_linear)[]; /* Linearized buffers. */
> + struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
> + unsigned int socket; /* CPU socket ID for allocations. */
> + struct ibv_exp_res_domain *rd; /* Resource Domain. */
> +};
> +
> +struct priv {
> + struct rte_eth_dev *dev; /* Ethernet device. */
> + struct ibv_context *ctx; /* Verbs context. */
> + struct ibv_device_attr device_attr; /* Device properties. */
> + struct ibv_pd *pd; /* Protection Domain. */
> + /*
> + * MAC addresses array and configuration bit-field.
> + * An extra entry that cannot be modified by the DPDK is reserved
> + * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
> + */
> + struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
> + BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> + /* VLAN filters. */
> + struct {
> + unsigned int enabled:1; /* If enabled. */
> + unsigned int id:12; /* VLAN ID (0-4095). */
> + } vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
> + /* Device properties. */
> + uint16_t mtu; /* Configured MTU. */
> + uint8_t port; /* Physical port number. */
> + unsigned int started:1; /* Device started, flows enabled. */
> + unsigned int promisc:1; /* Device in promiscuous mode. */
> + unsigned int allmulti:1; /* Device receives all multicast packets. */
> + unsigned int hw_qpg:1; /* QP groups are supported. */
> + unsigned int hw_tss:1; /* TSS is supported. */
> + unsigned int hw_rss:1; /* RSS is supported. */
> + unsigned int hw_csum:1; /* Checksum offload is supported. */
> + unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
> + unsigned int rss:1; /* RSS is enabled. */
> + unsigned int vf:1; /* This is a VF device. */
> + unsigned int pending_alarm:1; /* An alarm is pending. */
> +#ifdef INLINE_RECV
> + unsigned int inl_recv_size; /* Inline recv size */
> +#endif
> + unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
> + /* RX/TX queues. */
> + struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
> + unsigned int rxqs_n; /* RX queues array size. */
> + unsigned int txqs_n; /* TX queues array size. */
> + struct rxq *(*rxqs)[]; /* RX queues. */
> + struct txq *(*txqs)[]; /* TX queues. */
> + struct rte_intr_handle intr_handle; /* Interrupt handler. */
> + rte_spinlock_t lock; /* Lock for control functions. */
> +};
> +
> +void priv_lock(struct priv *priv);
> +void priv_unlock(struct priv *priv);
> +
> #endif /* RTE_PMD_MLX4_H_ */
> --
> 1.8.3.1
>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dpdk-dev] [PATCH v3 1/2] net/mlx4: split the definitions to the header file
2017-03-20 9:19 ` Nélio Laranjeiro
@ 2017-03-20 14:18 ` Ferruh Yigit
0 siblings, 0 replies; 15+ messages in thread
From: Ferruh Yigit @ 2017-03-20 14:18 UTC (permalink / raw)
To: Nélio Laranjeiro, Vasily Philipov; +Cc: dev, Adrien Mazarguil
On 3/20/2017 9:19 AM, Nélio Laranjeiro wrote:
> On Sun, Mar 05, 2017 at 09:51:31AM +0200, Vasily Philipov wrote:
>> Make priv_lock/priv_unlock functions and some other structs/defines visible
>> from different source files by placing them into mlx4.h header.
>>
>> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Series applied to dpdk-next-net/master, thanks.
^ permalink raw reply [flat|nested] 15+ messages in thread
* [dpdk-dev] [PATCH v3 2/2] net/mlx4: support basic flow items and actions
2017-02-21 14:07 [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
` (4 preceding siblings ...)
2017-03-05 7:51 ` [dpdk-dev] [PATCH v3 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
@ 2017-03-05 7:51 ` Vasily Philipov
2017-03-20 9:19 ` Nélio Laranjeiro
5 siblings, 1 reply; 15+ messages in thread
From: Vasily Philipov @ 2017-03-05 7:51 UTC (permalink / raw)
To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro
Adding support for the next items: eth, vlan, ipv4, udp, tcp and for the
next actions: queue, drop
Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
drivers/net/mlx4/Makefile | 3 +-
drivers/net/mlx4/mlx4.c | 91 +++-
drivers/net/mlx4/mlx4.h | 3 +
drivers/net/mlx4/mlx4_flow.c | 1043 ++++++++++++++++++++++++++++++++++++++++++
drivers/net/mlx4/mlx4_flow.h | 104 +++++
5 files changed, 1228 insertions(+), 16 deletions(-)
create mode 100644 drivers/net/mlx4/mlx4_flow.c
create mode 100644 drivers/net/mlx4/mlx4_flow.h
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 68c5902..1d463f7 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -36,6 +36,7 @@ LIB = librte_pmd_mlx4.a
# Sources.
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_flow.c
# Dependencies.
DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += lib/librte_ether
@@ -129,7 +130,7 @@ mlx4_autoconf.h: mlx4_autoconf.h.new
cmp '$<' '$@' $(AUTOCONF_OUTPUT) || \
mv '$<' '$@'
-mlx4.o: mlx4_autoconf.h
+$(SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD):.c=.o): mlx4_autoconf.h
clean_mlx4: FORCE
$Q rm -f -- mlx4_autoconf.h mlx4_autoconf.h.new
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 8f6c57f..bb0c647 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -72,12 +72,14 @@
#include <rte_log.h>
#include <rte_alarm.h>
#include <rte_memory.h>
+#include <rte_flow.h>
/* Generated configuration header. */
#include "mlx4_autoconf.h"
-/* PMD header. */
+/* PMD headers. */
#include "mlx4.h"
+#include "mlx4_flow.h"
/* Convenience macros for accessing mbuf fields. */
#define NEXT(m) ((m)->next)
@@ -2341,6 +2343,7 @@ struct txq_mp2mr_mbuf_check_data {
assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
*attr = (struct ibv_flow_attr){
.type = IBV_FLOW_ATTR_NORMAL,
+ .priority = 3,
.num_of_specs = 1,
.port = priv->port,
.flags = 0
@@ -3928,6 +3931,7 @@ struct txq_mp2mr_mbuf_check_data {
unsigned int i = 0;
unsigned int r;
struct rxq *rxq;
+ int ret;
if (mlx4_is_secondary())
return -E_RTE_SECONDARY;
@@ -3947,8 +3951,6 @@ struct txq_mp2mr_mbuf_check_data {
}
/* Iterate only once when RSS is enabled. */
do {
- int ret;
-
/* Ignore nonexistent RX queues. */
if (rxq == NULL)
continue;
@@ -3961,22 +3963,30 @@ struct txq_mp2mr_mbuf_check_data {
continue;
WARN("%p: QP flow attachment failed: %s",
(void *)dev, strerror(ret));
- /* Rollback. */
- while (i != 0) {
- rxq = (*priv->rxqs)[--i];
- if (rxq != NULL) {
- rxq_allmulticast_disable(rxq);
- rxq_promiscuous_disable(rxq);
- rxq_mac_addrs_del(rxq);
- }
- }
- priv->started = 0;
- priv_unlock(priv);
- return -ret;
+ goto err;
} while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
priv_dev_interrupt_handler_install(priv, dev);
+ ret = mlx4_priv_flow_start(priv);
+ if (ret) {
+ ERROR("%p: flow start failed: %s",
+ (void *)dev, strerror(ret));
+ goto err;
+ }
priv_unlock(priv);
return 0;
+err:
+ /* Rollback. */
+ while (i != 0) {
+ rxq = (*priv->rxqs)[i--];
+ if (rxq != NULL) {
+ rxq_allmulticast_disable(rxq);
+ rxq_promiscuous_disable(rxq);
+ rxq_mac_addrs_del(rxq);
+ }
+ }
+ priv->started = 0;
+ priv_unlock(priv);
+ return -ret;
}
/**
@@ -4011,6 +4021,7 @@ struct txq_mp2mr_mbuf_check_data {
rxq = (*priv->rxqs)[0];
r = priv->rxqs_n;
}
+ mlx4_priv_flow_stop(priv);
/* Iterate only once when RSS is enabled. */
do {
/* Ignore nonexistent RX queues. */
@@ -5012,6 +5023,55 @@ struct txq_mp2mr_mbuf_check_data {
return -ret;
}
+const struct rte_flow_ops mlx4_flow_ops = {
+ .validate = mlx4_flow_validate,
+ .create = mlx4_flow_create,
+ .destroy = mlx4_flow_destroy,
+ .flush = mlx4_flow_flush,
+ .query = NULL,
+};
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param filter_type
+ * Filter type.
+ * @param filter_op
+ * Operation to perform.
+ * @param arg
+ * Pointer to operation-specific structure.
+ *
+ * @return
+ * 0 on success, negative errno value on failure.
+ */
+static int
+mlx4_dev_filter_ctrl(struct rte_eth_dev *dev,
+ enum rte_filter_type filter_type,
+ enum rte_filter_op filter_op,
+ void *arg)
+{
+ int ret = EINVAL;
+
+ switch (filter_type) {
+ case RTE_ETH_FILTER_GENERIC:
+ if (filter_op != RTE_ETH_FILTER_GET)
+ return -EINVAL;
+ *(const void **)arg = &mlx4_flow_ops;
+ return 0;
+ case RTE_ETH_FILTER_FDIR:
+ DEBUG("%p: filter type FDIR is not supported by this PMD",
+ (void *)dev);
+ break;
+ default:
+ ERROR("%p: filter type (%d) not supported",
+ (void *)dev, filter_type);
+ break;
+ }
+ return -ret;
+}
+
static const struct eth_dev_ops mlx4_dev_ops = {
.dev_configure = mlx4_dev_configure,
.dev_start = mlx4_dev_start,
@@ -5046,6 +5106,7 @@ struct txq_mp2mr_mbuf_check_data {
.mac_addr_add = mlx4_mac_addr_add,
.mac_addr_set = mlx4_mac_addr_set,
.mtu_set = mlx4_dev_set_mtu,
+ .filter_ctrl = mlx4_dev_filter_ctrl,
};
/**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 70c9ecd..fac408b 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -295,6 +295,8 @@ struct txq {
struct ibv_exp_res_domain *rd; /* Resource Domain. */
};
+struct rte_flow;
+
struct priv {
struct rte_eth_dev *dev; /* Ethernet device. */
struct ibv_context *ctx; /* Verbs context. */
@@ -337,6 +339,7 @@ struct priv {
struct rxq *(*rxqs)[]; /* RX queues. */
struct txq *(*txqs)[]; /* TX queues. */
struct rte_intr_handle intr_handle; /* Interrupt handler. */
+ LIST_HEAD(mlx4_flows, rte_flow) flows;
rte_spinlock_t lock; /* Lock for control functions. */
};
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
new file mode 100644
index 0000000..65537c7
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -0,0 +1,1043 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_malloc.h>
+
+/* Generated configuration header. */
+#include "mlx4_autoconf.h"
+
+/* PMD headers. */
+#include "mlx4.h"
+#include "mlx4_flow.h"
+
+/** Static initializer for items. */
+#define ITEMS(...) \
+ (const enum rte_flow_item_type []){ \
+ __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
+ }
+
+/** Structure to generate a simple graph of layers supported by the NIC. */
+struct mlx4_flow_items {
+ /** List of possible actions for these items. */
+ const enum rte_flow_action_type *const actions;
+ /** Bit-masks corresponding to the possibilities for the item. */
+ const void *mask;
+ /**
+ * Default bit-masks to use when item->mask is not provided. When
+ * \default_mask is also NULL, the full supported bit-mask (\mask) is
+ * used instead.
+ */
+ const void *default_mask;
+ /** Bit-masks size in bytes. */
+ const unsigned int mask_sz;
+ /**
+ * Check support for a given item.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param mask[in]
+ * Bit-masks covering supported fields to compare with spec,
+ * last and mask in
+ * \item.
+ * @param size
+ * Bit-Mask size in bytes.
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+ int (*validate)(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size);
+ /**
+ * Conversion function from rte_flow to NIC specific flow.
+ *
+ * @param item
+ * rte_flow item to convert.
+ * @param default_mask
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data
+ * Internal structure to store the conversion.
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+ int (*convert)(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data);
+ /** Size in bytes of the destination structure. */
+ const unsigned int dst_sz;
+ /** List of possible following items. */
+ const enum rte_flow_item_type *const items;
+};
+
+/** Valid action for this PMD. */
+static const enum rte_flow_action_type valid_actions[] = {
+ RTE_FLOW_ACTION_TYPE_DROP,
+ RTE_FLOW_ACTION_TYPE_QUEUE,
+ RTE_FLOW_ACTION_TYPE_END,
+};
+
+/**
+ * Convert Ethernet item to Verbs specification.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param default_mask[in]
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ * User structure.
+ */
+static int
+mlx4_flow_create_eth(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data)
+{
+ const struct rte_flow_item_eth *spec = item->spec;
+ const struct rte_flow_item_eth *mask = item->mask;
+ struct mlx4_flow *flow = (struct mlx4_flow *)data;
+ struct ibv_flow_spec_eth *eth;
+ const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+ unsigned int i;
+
+ ++flow->ibv_attr->num_of_specs;
+ flow->ibv_attr->priority = 2;
+ eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+ *eth = (struct ibv_flow_spec_eth) {
+ .type = IBV_FLOW_SPEC_ETH,
+ .size = eth_size,
+ };
+ if (!spec) {
+ flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
+ return 0;
+ }
+ if (!mask)
+ mask = default_mask;
+ memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
+ memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+ memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
+ memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+ /* Remove unwanted bits from values. */
+ for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+ eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
+ eth->val.src_mac[i] &= eth->mask.src_mac[i];
+ }
+ return 0;
+}
+
+/**
+ * Convert VLAN item to Verbs specification.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param default_mask[in]
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ * User structure.
+ */
+static int
+mlx4_flow_create_vlan(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data)
+{
+ const struct rte_flow_item_vlan *spec = item->spec;
+ const struct rte_flow_item_vlan *mask = item->mask;
+ struct mlx4_flow *flow = (struct mlx4_flow *)data;
+ struct ibv_flow_spec_eth *eth;
+ const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+
+ eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
+ if (!spec)
+ return 0;
+ if (!mask)
+ mask = default_mask;
+ eth->val.vlan_tag = spec->tci;
+ eth->mask.vlan_tag = mask->tci;
+ eth->val.vlan_tag &= eth->mask.vlan_tag;
+ return 0;
+}
+
+/**
+ * Convert IPv4 item to Verbs specification.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param default_mask[in]
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ * User structure.
+ */
+static int
+mlx4_flow_create_ipv4(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data)
+{
+ const struct rte_flow_item_ipv4 *spec = item->spec;
+ const struct rte_flow_item_ipv4 *mask = item->mask;
+ struct mlx4_flow *flow = (struct mlx4_flow *)data;
+ struct ibv_flow_spec_ipv4 *ipv4;
+ unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4);
+
+ ++flow->ibv_attr->num_of_specs;
+ flow->ibv_attr->priority = 1;
+ ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+ *ipv4 = (struct ibv_flow_spec_ipv4) {
+ .type = IBV_FLOW_SPEC_IPV4,
+ .size = ipv4_size,
+ };
+ if (!spec)
+ return 0;
+ ipv4->val = (struct ibv_flow_ipv4_filter) {
+ .src_ip = spec->hdr.src_addr,
+ .dst_ip = spec->hdr.dst_addr,
+ };
+ if (!mask)
+ mask = default_mask;
+ ipv4->mask = (struct ibv_flow_ipv4_filter) {
+ .src_ip = mask->hdr.src_addr,
+ .dst_ip = mask->hdr.dst_addr,
+ };
+ /* Remove unwanted bits from values. */
+ ipv4->val.src_ip &= ipv4->mask.src_ip;
+ ipv4->val.dst_ip &= ipv4->mask.dst_ip;
+ return 0;
+}
+
+/**
+ * Convert UDP item to Verbs specification.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param default_mask[in]
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ * User structure.
+ */
+static int
+mlx4_flow_create_udp(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data)
+{
+ const struct rte_flow_item_udp *spec = item->spec;
+ const struct rte_flow_item_udp *mask = item->mask;
+ struct mlx4_flow *flow = (struct mlx4_flow *)data;
+ struct ibv_flow_spec_tcp_udp *udp;
+ unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+
+ ++flow->ibv_attr->num_of_specs;
+ flow->ibv_attr->priority = 0;
+ udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+ *udp = (struct ibv_flow_spec_tcp_udp) {
+ .type = IBV_FLOW_SPEC_UDP,
+ .size = udp_size,
+ };
+ if (!spec)
+ return 0;
+ udp->val.dst_port = spec->hdr.dst_port;
+ udp->val.src_port = spec->hdr.src_port;
+ if (!mask)
+ mask = default_mask;
+ udp->mask.dst_port = mask->hdr.dst_port;
+ udp->mask.src_port = mask->hdr.src_port;
+ /* Remove unwanted bits from values. */
+ udp->val.src_port &= udp->mask.src_port;
+ udp->val.dst_port &= udp->mask.dst_port;
+ return 0;
+}
+
+/**
+ * Convert TCP item to Verbs specification.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param default_mask[in]
+ * Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ * User structure.
+ */
+static int
+mlx4_flow_create_tcp(const struct rte_flow_item *item,
+ const void *default_mask,
+ void *data)
+{
+ const struct rte_flow_item_tcp *spec = item->spec;
+ const struct rte_flow_item_tcp *mask = item->mask;
+ struct mlx4_flow *flow = (struct mlx4_flow *)data;
+ struct ibv_flow_spec_tcp_udp *tcp;
+ unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+
+ ++flow->ibv_attr->num_of_specs;
+ flow->ibv_attr->priority = 0;
+ tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+ *tcp = (struct ibv_flow_spec_tcp_udp) {
+ .type = IBV_FLOW_SPEC_TCP,
+ .size = tcp_size,
+ };
+ if (!spec)
+ return 0;
+ tcp->val.dst_port = spec->hdr.dst_port;
+ tcp->val.src_port = spec->hdr.src_port;
+ if (!mask)
+ mask = default_mask;
+ tcp->mask.dst_port = mask->hdr.dst_port;
+ tcp->mask.src_port = mask->hdr.src_port;
+ /* Remove unwanted bits from values. */
+ tcp->val.src_port &= tcp->mask.src_port;
+ tcp->val.dst_port &= tcp->mask.dst_port;
+ return 0;
+}
+
+/**
+ * Check support for a given item.
+ *
+ * @param item[in]
+ * Item specification.
+ * @param mask[in]
+ * Bit-masks covering supported fields to compare with spec, last and mask in
+ * \item.
+ * @param size
+ * Bit-Mask size in bytes.
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+static int
+mlx4_flow_item_validate(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ int ret = 0;
+
+ if (!item->spec && (item->mask || item->last))
+ return -1;
+ if (item->spec && !item->mask) {
+ unsigned int i;
+ const uint8_t *spec = item->spec;
+
+ for (i = 0; i < size; ++i)
+ if ((spec[i] | mask[i]) != mask[i])
+ return -1;
+ }
+ if (item->last && !item->mask) {
+ unsigned int i;
+ const uint8_t *spec = item->last;
+
+ for (i = 0; i < size; ++i)
+ if ((spec[i] | mask[i]) != mask[i])
+ return -1;
+ }
+ if (item->spec && item->last) {
+ uint8_t spec[size];
+ uint8_t last[size];
+ const uint8_t *apply = mask;
+ unsigned int i;
+
+ if (item->mask)
+ apply = item->mask;
+ for (i = 0; i < size; ++i) {
+ spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
+ last[i] = ((const uint8_t *)item->last)[i] & apply[i];
+ }
+ ret = memcmp(spec, last, size);
+ }
+ return ret;
+}
+
+static int
+mlx4_flow_validate_eth(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ if (item->mask) {
+ const struct rte_flow_item_eth *mask = item->mask;
+
+ if (mask->dst.addr_bytes[0] != 0xff ||
+ mask->dst.addr_bytes[1] != 0xff ||
+ mask->dst.addr_bytes[2] != 0xff ||
+ mask->dst.addr_bytes[3] != 0xff ||
+ mask->dst.addr_bytes[4] != 0xff ||
+ mask->dst.addr_bytes[5] != 0xff)
+ return -1;
+ }
+ return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_vlan(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ if (item->mask) {
+ const struct rte_flow_item_vlan *mask = item->mask;
+
+ if (mask->tci != 0 &&
+ ntohs(mask->tci) != 0x0fff)
+ return -1;
+ }
+ return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_ipv4(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ if (item->mask) {
+ const struct rte_flow_item_ipv4 *mask = item->mask;
+
+ if (mask->hdr.src_addr != 0 &&
+ mask->hdr.src_addr != 0xffffffff)
+ return -1;
+ if (mask->hdr.dst_addr != 0 &&
+ mask->hdr.dst_addr != 0xffffffff)
+ return -1;
+ }
+ return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_udp(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ if (item->mask) {
+ const struct rte_flow_item_udp *mask = item->mask;
+
+ if (mask->hdr.src_port != 0 &&
+ mask->hdr.src_port != 0xffff)
+ return -1;
+ if (mask->hdr.dst_port != 0 &&
+ mask->hdr.dst_port != 0xffff)
+ return -1;
+ }
+ return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_tcp(const struct rte_flow_item *item,
+ const uint8_t *mask, unsigned int size)
+{
+ if (item->mask) {
+ const struct rte_flow_item_tcp *mask = item->mask;
+
+ if (mask->hdr.src_port != 0 &&
+ mask->hdr.src_port != 0xffff)
+ return -1;
+ if (mask->hdr.dst_port != 0 &&
+ mask->hdr.dst_port != 0xffff)
+ return -1;
+ }
+ return mlx4_flow_item_validate(item, mask, size);
+}
+
+/** Graph of supported items and associated actions. */
+static const struct mlx4_flow_items mlx4_flow_items[] = {
+ [RTE_FLOW_ITEM_TYPE_END] = {
+ .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
+ },
+ [RTE_FLOW_ITEM_TYPE_ETH] = {
+ .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
+ RTE_FLOW_ITEM_TYPE_IPV4),
+ .actions = valid_actions,
+ .mask = &(const struct rte_flow_item_eth){
+ .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ },
+ .default_mask = &rte_flow_item_eth_mask,
+ .mask_sz = sizeof(struct rte_flow_item_eth),
+ .validate = mlx4_flow_validate_eth,
+ .convert = mlx4_flow_create_eth,
+ .dst_sz = sizeof(struct ibv_flow_spec_eth),
+ },
+ [RTE_FLOW_ITEM_TYPE_VLAN] = {
+ .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4),
+ .actions = valid_actions,
+ .mask = &(const struct rte_flow_item_vlan){
+ /* rte_flow_item_vlan_mask is invalid for mlx4. */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ .tci = 0x0fff,
+#else
+ .tci = 0xff0f,
+#endif
+ },
+ .mask_sz = sizeof(struct rte_flow_item_vlan),
+ .validate = mlx4_flow_validate_vlan,
+ .convert = mlx4_flow_create_vlan,
+ .dst_sz = 0,
+ },
+ [RTE_FLOW_ITEM_TYPE_IPV4] = {
+ .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
+ RTE_FLOW_ITEM_TYPE_TCP),
+ .actions = valid_actions,
+ .mask = &(const struct rte_flow_item_ipv4){
+ .hdr = {
+ .src_addr = -1,
+ .dst_addr = -1,
+ },
+ },
+ .default_mask = &rte_flow_item_ipv4_mask,
+ .mask_sz = sizeof(struct rte_flow_item_ipv4),
+ .validate = mlx4_flow_validate_ipv4,
+ .convert = mlx4_flow_create_ipv4,
+ .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
+ },
+ [RTE_FLOW_ITEM_TYPE_UDP] = {
+ .actions = valid_actions,
+ .mask = &(const struct rte_flow_item_udp){
+ .hdr = {
+ .src_port = -1,
+ .dst_port = -1,
+ },
+ },
+ .default_mask = &rte_flow_item_udp_mask,
+ .mask_sz = sizeof(struct rte_flow_item_udp),
+ .validate = mlx4_flow_validate_udp,
+ .convert = mlx4_flow_create_udp,
+ .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
+ },
+ [RTE_FLOW_ITEM_TYPE_TCP] = {
+ .actions = valid_actions,
+ .mask = &(const struct rte_flow_item_tcp){
+ .hdr = {
+ .src_port = -1,
+ .dst_port = -1,
+ },
+ },
+ .default_mask = &rte_flow_item_tcp_mask,
+ .mask_sz = sizeof(struct rte_flow_item_tcp),
+ .validate = mlx4_flow_validate_tcp,
+ .convert = mlx4_flow_create_tcp,
+ .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
+ },
+};
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ * @param[in, out] flow
+ * Flow structure to update.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_validate(struct priv *priv,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error,
+ struct mlx4_flow *flow)
+{
+ const struct mlx4_flow_items *cur_item = mlx4_flow_items;
+ struct mlx4_flow_action action = {
+ .queue = 0,
+ .drop = 0,
+ };
+
+ (void)priv;
+ if (attr->group) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+ NULL,
+ "groups are not supported");
+ return -rte_errno;
+ }
+ if (attr->priority) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+ NULL,
+ "priorities are not supported");
+ return -rte_errno;
+ }
+ if (attr->egress) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+ NULL,
+ "egress is not supported");
+ return -rte_errno;
+ }
+ if (!attr->ingress) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+ NULL,
+ "only ingress is supported");
+ return -rte_errno;
+ }
+ /* Go over items list. */
+ for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+ const struct mlx4_flow_items *token = NULL;
+ unsigned int i;
+ int err;
+
+ if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+ continue;
+ /*
+ * The nic can support patterns with NULL eth spec only
+ * if eth is a single item in a rule.
+ */
+ if (!items->spec &&
+ items->type == RTE_FLOW_ITEM_TYPE_ETH) {
+ const struct rte_flow_item *next = items + 1;
+
+ if (next->type != RTE_FLOW_ITEM_TYPE_END) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ items,
+ "the rule requires"
+ " an Ethernet spec");
+ return -rte_errno;
+ }
+ }
+ for (i = 0;
+ cur_item->items &&
+ cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
+ ++i) {
+ if (cur_item->items[i] == items->type) {
+ token = &mlx4_flow_items[items->type];
+ break;
+ }
+ }
+ if (!token)
+ goto exit_item_not_supported;
+ cur_item = token;
+ err = cur_item->validate(items,
+ (const uint8_t *)cur_item->mask,
+ cur_item->mask_sz);
+ if (err)
+ goto exit_item_not_supported;
+ if (flow->ibv_attr && cur_item->convert) {
+ err = cur_item->convert(items,
+ (cur_item->default_mask ?
+ cur_item->default_mask :
+ cur_item->mask),
+ flow);
+ if (err)
+ goto exit_item_not_supported;
+ }
+ flow->offset += cur_item->dst_sz;
+ }
+ /* Go over actions list */
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+ if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+ continue;
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+ action.drop = 1;
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+ const struct rte_flow_action_queue *queue =
+ (const struct rte_flow_action_queue *)
+ actions->conf;
+
+ if (!queue || (queue->index > (priv->rxqs_n - 1)))
+ goto exit_action_not_supported;
+ action.queue = 1;
+ } else {
+ goto exit_action_not_supported;
+ }
+ }
+ if (!action.queue && !action.drop) {
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "no valid action");
+ return -rte_errno;
+ }
+ return 0;
+exit_item_not_supported:
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+ items, "item not supported");
+ return -rte_errno;
+exit_action_not_supported:
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ actions, "action not supported");
+ return -rte_errno;
+}
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+ int ret;
+ struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr) };
+
+ priv_lock(priv);
+ ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
+ priv_unlock(priv);
+ return ret;
+}
+
+/**
+ * Complete flow rule creation.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param ibv_attr
+ * Verbs flow attributes.
+ * @param action
+ * Target action structure.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * A flow if the rule could be created.
+ */
+static struct rte_flow *
+priv_flow_create_action_queue(struct priv *priv,
+ struct ibv_flow_attr *ibv_attr,
+ struct mlx4_flow_action *action,
+ struct rte_flow_error *error)
+{
+ struct rxq *rxq;
+ struct ibv_qp *qp;
+ struct rte_flow *rte_flow;
+
+ assert(priv->pd);
+ assert(priv->ctx);
+ rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
+ if (!rte_flow) {
+ rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "cannot allocate flow memory");
+ return NULL;
+ }
+ rxq = (*priv->rxqs)[action->queue_id];
+ if (action->drop) {
+ rte_flow->cq =
+ ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
+ &(struct ibv_exp_cq_init_attr){
+ .comp_mask = 0,
+ });
+ if (!rte_flow->cq) {
+ rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "cannot allocate CQ");
+ goto error;
+ }
+ rte_flow->qp = ibv_exp_create_qp(
+ priv->ctx,
+ &(struct ibv_exp_qp_init_attr){
+ .send_cq = rte_flow->cq,
+ .recv_cq = rte_flow->cq,
+ .cap = {
+ .max_recv_wr = 1,
+ .max_recv_sge = 1,
+ },
+ .qp_type = IBV_QPT_RAW_PACKET,
+ .comp_mask =
+ IBV_EXP_QP_INIT_ATTR_PD |
+ IBV_EXP_QP_INIT_ATTR_PORT |
+ IBV_EXP_QP_INIT_ATTR_RES_DOMAIN,
+ .pd = priv->pd,
+ .res_domain = rxq->rd,
+ .port_num = priv->port,
+ });
+ if (!rte_flow->qp) {
+ rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "cannot allocate QP");
+ goto error;
+ }
+ qp = rte_flow->qp;
+ } else {
+ rte_flow->rxq = rxq;
+ qp = rxq->qp;
+ }
+ rte_flow->ibv_attr = ibv_attr;
+ rte_flow->ibv_flow = ibv_create_flow(qp, rte_flow->ibv_attr);
+ if (!rte_flow->ibv_flow) {
+ rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "flow rule creation failure");
+ goto error;
+ }
+ return rte_flow;
+
+error:
+ assert(rte_flow);
+ if (rte_flow->cq)
+ ibv_destroy_cq(rte_flow->cq);
+ if (rte_flow->qp)
+ ibv_destroy_qp(rte_flow->qp);
+ rte_free(rte_flow->ibv_attr);
+ rte_free(rte_flow);
+ return NULL;
+}
+
+/**
+ * Convert a flow.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * A flow on success, NULL otherwise.
+ */
+static struct rte_flow *
+priv_flow_create(struct priv *priv,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct rte_flow *rte_flow;
+ struct mlx4_flow_action action;
+ struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr), };
+ int err;
+
+ err = priv_flow_validate(priv, attr, items, actions, error, &flow);
+ if (err)
+ return NULL;
+ flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
+ if (!flow.ibv_attr) {
+ rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "cannot allocate ibv_attr memory");
+ return NULL;
+ }
+ flow.offset = sizeof(struct ibv_flow_attr);
+ *flow.ibv_attr = (struct ibv_flow_attr){
+ .comp_mask = 0,
+ .type = IBV_FLOW_ATTR_NORMAL,
+ .size = sizeof(struct ibv_flow_attr),
+ .priority = attr->priority,
+ .num_of_specs = 0,
+ .port = priv->port,
+ .flags = 0,
+ };
+ claim_zero(priv_flow_validate(priv, attr, items, actions,
+ error, &flow));
+ action = (struct mlx4_flow_action){
+ .queue = 0,
+ .drop = 0,
+ };
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+ if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+ continue;
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+ action.queue = 1;
+ action.queue_id =
+ ((const struct rte_flow_action_queue *)
+ actions->conf)->index;
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+ action.drop = 1;
+ } else {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions, "unsupported action");
+ goto exit;
+ }
+ }
+ rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
+ &action, error);
+ return rte_flow;
+exit:
+ rte_free(flow.ibv_attr);
+ return NULL;
+}
+
+/**
+ * Create a flow.
+ *
+ * @see rte_flow_create()
+ * @see rte_flow_ops
+ */
+struct rte_flow *
+mlx4_flow_create(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct rte_flow *flow;
+
+ priv_lock(priv);
+ flow = priv_flow_create(priv, attr, items, actions, error);
+ if (flow) {
+ LIST_INSERT_HEAD(&priv->flows, flow, next);
+ DEBUG("Flow created %p", (void *)flow);
+ }
+ priv_unlock(priv);
+ return flow;
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param[in] flow
+ * Flow to destroy.
+ */
+static void
+priv_flow_destroy(struct priv *priv, struct rte_flow *flow)
+{
+ (void)priv;
+ LIST_REMOVE(flow, next);
+ if (flow->ibv_flow)
+ claim_zero(ibv_destroy_flow(flow->ibv_flow));
+ if (flow->qp)
+ claim_zero(ibv_destroy_qp(flow->qp));
+ if (flow->cq)
+ claim_zero(ibv_destroy_cq(flow->cq));
+ rte_free(flow->ibv_attr);
+ DEBUG("Flow destroyed %p", (void *)flow);
+ rte_free(flow);
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_destroy(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+
+ (void)error;
+ priv_lock(priv);
+ priv_flow_destroy(priv, flow);
+ priv_unlock(priv);
+ return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @param priv
+ * Pointer to private structure.
+ */
+static void
+priv_flow_flush(struct priv *priv)
+{
+ while (!LIST_EMPTY(&priv->flows)) {
+ struct rte_flow *flow;
+
+ flow = LIST_FIRST(&priv->flows);
+ priv_flow_destroy(priv, flow);
+ }
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+
+ (void)error;
+ priv_lock(priv);
+ priv_flow_flush(priv);
+ priv_unlock(priv);
+ return 0;
+}
+
+/**
+ * Remove all flows.
+ *
+ * Called by dev_stop() to remove all flows.
+ *
+ * @param priv
+ * Pointer to private structure.
+ */
+void
+mlx4_priv_flow_stop(struct priv *priv)
+{
+ struct rte_flow *flow;
+
+ for (flow = LIST_FIRST(&priv->flows);
+ flow;
+ flow = LIST_NEXT(flow, next)) {
+ claim_zero(ibv_destroy_flow(flow->ibv_flow));
+ flow->ibv_flow = NULL;
+ DEBUG("Flow %p removed", (void *)flow);
+ }
+}
+
+/**
+ * Add all flows.
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, a errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_priv_flow_start(struct priv *priv)
+{
+ struct ibv_qp *qp;
+ struct rte_flow *flow;
+
+ for (flow = LIST_FIRST(&priv->flows);
+ flow;
+ flow = LIST_NEXT(flow, next)) {
+ qp = flow->qp ? flow->qp : flow->rxq->qp;
+ flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
+ if (!flow->ibv_flow) {
+ DEBUG("Flow %p cannot be applied", (void *)flow);
+ rte_errno = EINVAL;
+ return rte_errno;
+ }
+ DEBUG("Flow %p applied", (void *)flow);
+ }
+ return 0;
+}
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
new file mode 100644
index 0000000..66c5be6
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -0,0 +1,104 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_PMD_MLX4_FLOW_H_
+#define RTE_PMD_MLX4_FLOW_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/queue.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_byteorder.h>
+
+#include "mlx4.h"
+
+struct rte_flow {
+ LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
+ struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
+ struct ibv_flow *ibv_flow; /**< Verbs flow. */
+ struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+ struct ibv_qp *qp; /**< Verbs queue pair. */
+ struct ibv_cq *cq; /**< Verbs completion queue. */
+};
+
+int
+mlx4_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error);
+
+struct rte_flow *
+mlx4_flow_create(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error);
+
+int
+mlx4_flow_destroy(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ struct rte_flow_error *error);
+
+int
+mlx4_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error);
+
+/** Structure to pass to the conversion function. */
+struct mlx4_flow {
+ struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
+ unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
+};
+
+struct mlx4_flow_action {
+ uint32_t drop:1; /**< Target is a drop queue. */
+ uint32_t queue:1; /**< Target is a receive queue. */
+ uint32_t queue_id; /**< Identifier of the queue. */
+};
+
+int mlx4_priv_flow_start(struct priv *priv);
+void mlx4_priv_flow_stop(struct priv *priv);
+
+#endif /* RTE_PMD_MLX4_FLOW_H_ */
--
1.8.3.1
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dpdk-dev] [PATCH v3 2/2] net/mlx4: support basic flow items and actions
2017-03-05 7:51 ` [dpdk-dev] [PATCH v3 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
@ 2017-03-20 9:19 ` Nélio Laranjeiro
0 siblings, 0 replies; 15+ messages in thread
From: Nélio Laranjeiro @ 2017-03-20 9:19 UTC (permalink / raw)
To: Vasily Philipov; +Cc: dev, Adrien Mazarguil
On Sun, Mar 05, 2017 at 09:51:32AM +0200, Vasily Philipov wrote:
> Adding support for the next items: eth, vlan, ipv4, udp, tcp and for the
> next actions: queue, drop
>
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> ---
> drivers/net/mlx4/Makefile | 3 +-
> drivers/net/mlx4/mlx4.c | 91 +++-
> drivers/net/mlx4/mlx4.h | 3 +
> drivers/net/mlx4/mlx4_flow.c | 1043 ++++++++++++++++++++++++++++++++++++++++++
> drivers/net/mlx4/mlx4_flow.h | 104 +++++
> 5 files changed, 1228 insertions(+), 16 deletions(-)
> create mode 100644 drivers/net/mlx4/mlx4_flow.c
> create mode 100644 drivers/net/mlx4/mlx4_flow.h
>
> diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
> index 68c5902..1d463f7 100644
> --- a/drivers/net/mlx4/Makefile
> +++ b/drivers/net/mlx4/Makefile
> @@ -36,6 +36,7 @@ LIB = librte_pmd_mlx4.a
>
> # Sources.
> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c
> +SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_flow.c
>
> # Dependencies.
> DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += lib/librte_ether
> @@ -129,7 +130,7 @@ mlx4_autoconf.h: mlx4_autoconf.h.new
> cmp '$<' '$@' $(AUTOCONF_OUTPUT) || \
> mv '$<' '$@'
>
> -mlx4.o: mlx4_autoconf.h
> +$(SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD):.c=.o): mlx4_autoconf.h
>
> clean_mlx4: FORCE
> $Q rm -f -- mlx4_autoconf.h mlx4_autoconf.h.new
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index 8f6c57f..bb0c647 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -72,12 +72,14 @@
> #include <rte_log.h>
> #include <rte_alarm.h>
> #include <rte_memory.h>
> +#include <rte_flow.h>
>
> /* Generated configuration header. */
> #include "mlx4_autoconf.h"
>
> -/* PMD header. */
> +/* PMD headers. */
> #include "mlx4.h"
> +#include "mlx4_flow.h"
>
> /* Convenience macros for accessing mbuf fields. */
> #define NEXT(m) ((m)->next)
> @@ -2341,6 +2343,7 @@ struct txq_mp2mr_mbuf_check_data {
> assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
> *attr = (struct ibv_flow_attr){
> .type = IBV_FLOW_ATTR_NORMAL,
> + .priority = 3,
> .num_of_specs = 1,
> .port = priv->port,
> .flags = 0
> @@ -3928,6 +3931,7 @@ struct txq_mp2mr_mbuf_check_data {
> unsigned int i = 0;
> unsigned int r;
> struct rxq *rxq;
> + int ret;
>
> if (mlx4_is_secondary())
> return -E_RTE_SECONDARY;
> @@ -3947,8 +3951,6 @@ struct txq_mp2mr_mbuf_check_data {
> }
> /* Iterate only once when RSS is enabled. */
> do {
> - int ret;
> -
> /* Ignore nonexistent RX queues. */
> if (rxq == NULL)
> continue;
> @@ -3961,22 +3963,30 @@ struct txq_mp2mr_mbuf_check_data {
> continue;
> WARN("%p: QP flow attachment failed: %s",
> (void *)dev, strerror(ret));
> - /* Rollback. */
> - while (i != 0) {
> - rxq = (*priv->rxqs)[--i];
> - if (rxq != NULL) {
> - rxq_allmulticast_disable(rxq);
> - rxq_promiscuous_disable(rxq);
> - rxq_mac_addrs_del(rxq);
> - }
> - }
> - priv->started = 0;
> - priv_unlock(priv);
> - return -ret;
> + goto err;
> } while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
> priv_dev_interrupt_handler_install(priv, dev);
> + ret = mlx4_priv_flow_start(priv);
> + if (ret) {
> + ERROR("%p: flow start failed: %s",
> + (void *)dev, strerror(ret));
> + goto err;
> + }
> priv_unlock(priv);
> return 0;
> +err:
> + /* Rollback. */
> + while (i != 0) {
> + rxq = (*priv->rxqs)[i--];
> + if (rxq != NULL) {
> + rxq_allmulticast_disable(rxq);
> + rxq_promiscuous_disable(rxq);
> + rxq_mac_addrs_del(rxq);
> + }
> + }
> + priv->started = 0;
> + priv_unlock(priv);
> + return -ret;
> }
>
> /**
> @@ -4011,6 +4021,7 @@ struct txq_mp2mr_mbuf_check_data {
> rxq = (*priv->rxqs)[0];
> r = priv->rxqs_n;
> }
> + mlx4_priv_flow_stop(priv);
> /* Iterate only once when RSS is enabled. */
> do {
> /* Ignore nonexistent RX queues. */
> @@ -5012,6 +5023,55 @@ struct txq_mp2mr_mbuf_check_data {
> return -ret;
> }
>
> +const struct rte_flow_ops mlx4_flow_ops = {
> + .validate = mlx4_flow_validate,
> + .create = mlx4_flow_create,
> + .destroy = mlx4_flow_destroy,
> + .flush = mlx4_flow_flush,
> + .query = NULL,
> +};
> +
> +/**
> + * Manage filter operations.
> + *
> + * @param dev
> + * Pointer to Ethernet device structure.
> + * @param filter_type
> + * Filter type.
> + * @param filter_op
> + * Operation to perform.
> + * @param arg
> + * Pointer to operation-specific structure.
> + *
> + * @return
> + * 0 on success, negative errno value on failure.
> + */
> +static int
> +mlx4_dev_filter_ctrl(struct rte_eth_dev *dev,
> + enum rte_filter_type filter_type,
> + enum rte_filter_op filter_op,
> + void *arg)
> +{
> + int ret = EINVAL;
> +
> + switch (filter_type) {
> + case RTE_ETH_FILTER_GENERIC:
> + if (filter_op != RTE_ETH_FILTER_GET)
> + return -EINVAL;
> + *(const void **)arg = &mlx4_flow_ops;
> + return 0;
> + case RTE_ETH_FILTER_FDIR:
> + DEBUG("%p: filter type FDIR is not supported by this PMD",
> + (void *)dev);
> + break;
> + default:
> + ERROR("%p: filter type (%d) not supported",
> + (void *)dev, filter_type);
> + break;
> + }
> + return -ret;
> +}
> +
> static const struct eth_dev_ops mlx4_dev_ops = {
> .dev_configure = mlx4_dev_configure,
> .dev_start = mlx4_dev_start,
> @@ -5046,6 +5106,7 @@ struct txq_mp2mr_mbuf_check_data {
> .mac_addr_add = mlx4_mac_addr_add,
> .mac_addr_set = mlx4_mac_addr_set,
> .mtu_set = mlx4_dev_set_mtu,
> + .filter_ctrl = mlx4_dev_filter_ctrl,
> };
>
> /**
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index 70c9ecd..fac408b 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -295,6 +295,8 @@ struct txq {
> struct ibv_exp_res_domain *rd; /* Resource Domain. */
> };
>
> +struct rte_flow;
> +
> struct priv {
> struct rte_eth_dev *dev; /* Ethernet device. */
> struct ibv_context *ctx; /* Verbs context. */
> @@ -337,6 +339,7 @@ struct priv {
> struct rxq *(*rxqs)[]; /* RX queues. */
> struct txq *(*txqs)[]; /* TX queues. */
> struct rte_intr_handle intr_handle; /* Interrupt handler. */
> + LIST_HEAD(mlx4_flows, rte_flow) flows;
> rte_spinlock_t lock; /* Lock for control functions. */
> };
>
> diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
> new file mode 100644
> index 0000000..65537c7
> --- /dev/null
> +++ b/drivers/net/mlx4/mlx4_flow.c
> @@ -0,0 +1,1043 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright 2017 6WIND S.A.
> + * Copyright 2017 Mellanox.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of 6WIND S.A. nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <assert.h>
> +
> +#include <rte_flow.h>
> +#include <rte_flow_driver.h>
> +#include <rte_malloc.h>
> +
> +/* Generated configuration header. */
> +#include "mlx4_autoconf.h"
> +
> +/* PMD headers. */
> +#include "mlx4.h"
> +#include "mlx4_flow.h"
> +
> +/** Static initializer for items. */
> +#define ITEMS(...) \
> + (const enum rte_flow_item_type []){ \
> + __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
> + }
> +
> +/** Structure to generate a simple graph of layers supported by the NIC. */
> +struct mlx4_flow_items {
> + /** List of possible actions for these items. */
> + const enum rte_flow_action_type *const actions;
> + /** Bit-masks corresponding to the possibilities for the item. */
> + const void *mask;
> + /**
> + * Default bit-masks to use when item->mask is not provided. When
> + * \default_mask is also NULL, the full supported bit-mask (\mask) is
> + * used instead.
> + */
> + const void *default_mask;
> + /** Bit-masks size in bytes. */
> + const unsigned int mask_sz;
> + /**
> + * Check support for a given item.
> + *
> + * @param item[in]
> + * Item specification.
> + * @param mask[in]
> + * Bit-masks covering supported fields to compare with spec,
> + * last and mask in
> + * \item.
> + * @param size
> + * Bit-Mask size in bytes.
> + *
> + * @return
> + * 0 on success, negative value otherwise.
> + */
> + int (*validate)(const struct rte_flow_item *item,
> + const uint8_t *mask, unsigned int size);
> + /**
> + * Conversion function from rte_flow to NIC specific flow.
> + *
> + * @param item
> + * rte_flow item to convert.
> + * @param default_mask
> + * Default bit-masks to use when item->mask is not provided.
> + * @param data
> + * Internal structure to store the conversion.
> + *
> + * @return
> + * 0 on success, negative value otherwise.
> + */
> + int (*convert)(const struct rte_flow_item *item,
> + const void *default_mask,
> + void *data);
> + /** Size in bytes of the destination structure. */
> + const unsigned int dst_sz;
> + /** List of possible following items. */
> + const enum rte_flow_item_type *const items;
> +};
> +
> +/** Valid action for this PMD. */
> +static const enum rte_flow_action_type valid_actions[] = {
> + RTE_FLOW_ACTION_TYPE_DROP,
> + RTE_FLOW_ACTION_TYPE_QUEUE,
> + RTE_FLOW_ACTION_TYPE_END,
> +};
> +
> +/**
> + * Convert Ethernet item to Verbs specification.
> + *
> + * @param item[in]
> + * Item specification.
> + * @param default_mask[in]
> + * Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + * User structure.
> + */
> +static int
> +mlx4_flow_create_eth(const struct rte_flow_item *item,
> + const void *default_mask,
> + void *data)
> +{
> + const struct rte_flow_item_eth *spec = item->spec;
> + const struct rte_flow_item_eth *mask = item->mask;
> + struct mlx4_flow *flow = (struct mlx4_flow *)data;
> + struct ibv_flow_spec_eth *eth;
> + const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
> + unsigned int i;
> +
> + ++flow->ibv_attr->num_of_specs;
> + flow->ibv_attr->priority = 2;
> + eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> + *eth = (struct ibv_flow_spec_eth) {
> + .type = IBV_FLOW_SPEC_ETH,
> + .size = eth_size,
> + };
> + if (!spec) {
> + flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
> + return 0;
> + }
> + if (!mask)
> + mask = default_mask;
> + memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
> + memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
> + memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
> + memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
> + /* Remove unwanted bits from values. */
> + for (i = 0; i < ETHER_ADDR_LEN; ++i) {
> + eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
> + eth->val.src_mac[i] &= eth->mask.src_mac[i];
> + }
> + return 0;
> +}
> +
> +/**
> + * Convert VLAN item to Verbs specification.
> + *
> + * @param item[in]
> + * Item specification.
> + * @param default_mask[in]
> + * Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + * User structure.
> + */
> +static int
> +mlx4_flow_create_vlan(const struct rte_flow_item *item,
> + const void *default_mask,
> + void *data)
> +{
> + const struct rte_flow_item_vlan *spec = item->spec;
> + const struct rte_flow_item_vlan *mask = item->mask;
> + struct mlx4_flow *flow = (struct mlx4_flow *)data;
> + struct ibv_flow_spec_eth *eth;
> + const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
> +
> + eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
> + if (!spec)
> + return 0;
> + if (!mask)
> + mask = default_mask;
> + eth->val.vlan_tag = spec->tci;
> + eth->mask.vlan_tag = mask->tci;
> + eth->val.vlan_tag &= eth->mask.vlan_tag;
> + return 0;
> +}
> +
> +/**
> + * Convert IPv4 item to Verbs specification.
> + *
> + * @param item[in]
> + * Item specification.
> + * @param default_mask[in]
> + * Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + * User structure.
> + */
> +static int
> +mlx4_flow_create_ipv4(const struct rte_flow_item *item,
> + const void *default_mask,
> + void *data)
> +{
> + const struct rte_flow_item_ipv4 *spec = item->spec;
> + const struct rte_flow_item_ipv4 *mask = item->mask;
> + struct mlx4_flow *flow = (struct mlx4_flow *)data;
> + struct ibv_flow_spec_ipv4 *ipv4;
> + unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4);
> +
> + ++flow->ibv_attr->num_of_specs;
> + flow->ibv_attr->priority = 1;
> + ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> + *ipv4 = (struct ibv_flow_spec_ipv4) {
> + .type = IBV_FLOW_SPEC_IPV4,
> + .size = ipv4_size,
> + };
> + if (!spec)
> + return 0;
> + ipv4->val = (struct ibv_flow_ipv4_filter) {
> + .src_ip = spec->hdr.src_addr,
> + .dst_ip = spec->hdr.dst_addr,
> + };
> + if (!mask)
> + mask = default_mask;
> + ipv4->mask = (struct ibv_flow_ipv4_filter) {
> + .src_ip = mask->hdr.src_addr,
> + .dst_ip = mask->hdr.dst_addr,
> + };
> + /* Remove unwanted bits from values. */
> + ipv4->val.src_ip &= ipv4->mask.src_ip;
> + ipv4->val.dst_ip &= ipv4->mask.dst_ip;
> + return 0;
> +}
> +
> +/**
> + * Convert UDP item to Verbs specification.
> + *
> + * @param item[in]
> + * Item specification.
> + * @param default_mask[in]
> + * Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + * User structure.
> + */
> +static int
> +mlx4_flow_create_udp(const struct rte_flow_item *item,
> + const void *default_mask,
> + void *data)
> +{
> + const struct rte_flow_item_udp *spec = item->spec;
> + const struct rte_flow_item_udp *mask = item->mask;
> + struct mlx4_flow *flow = (struct mlx4_flow *)data;
> + struct ibv_flow_spec_tcp_udp *udp;
> + unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
> +
> + ++flow->ibv_attr->num_of_specs;
> + flow->ibv_attr->priority = 0;
> + udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> + *udp = (struct ibv_flow_spec_tcp_udp) {
> + .type = IBV_FLOW_SPEC_UDP,
> + .size = udp_size,
> + };
> + if (!spec)
> + return 0;
> + udp->val.dst_port = spec->hdr.dst_port;
> + udp->val.src_port = spec->hdr.src_port;
> + if (!mask)
> + mask = default_mask;
> + udp->mask.dst_port = mask->hdr.dst_port;
> + udp->mask.src_port = mask->hdr.src_port;
> + /* Remove unwanted bits from values. */
> + udp->val.src_port &= udp->mask.src_port;
> + udp->val.dst_port &= udp->mask.dst_port;
> + return 0;
> +}
> +
> +/**
> + * Convert TCP item to Verbs specification.
> + *
> + * @param item[in]
> + * Item specification.
> + * @param default_mask[in]
> + * Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + * User structure.
> + */
> +static int
> +mlx4_flow_create_tcp(const struct rte_flow_item *item,
> + const void *default_mask,
> + void *data)
> +{
> + const struct rte_flow_item_tcp *spec = item->spec;
> + const struct rte_flow_item_tcp *mask = item->mask;
> + struct mlx4_flow *flow = (struct mlx4_flow *)data;
> + struct ibv_flow_spec_tcp_udp *tcp;
> + unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
> +
> + ++flow->ibv_attr->num_of_specs;
> + flow->ibv_attr->priority = 0;
> + tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> + *tcp = (struct ibv_flow_spec_tcp_udp) {
> + .type = IBV_FLOW_SPEC_TCP,
> + .size = tcp_size,
> + };
> + if (!spec)
> + return 0;
> + tcp->val.dst_port = spec->hdr.dst_port;
> + tcp->val.src_port = spec->hdr.src_port;
> + if (!mask)
> + mask = default_mask;
> + tcp->mask.dst_port = mask->hdr.dst_port;
> + tcp->mask.src_port = mask->hdr.src_port;
> + /* Remove unwanted bits from values. */
> + tcp->val.src_port &= tcp->mask.src_port;
> + tcp->val.dst_port &= tcp->mask.dst_port;
> + return 0;
> +}
> +
> +/**
> + * Check support for a given item.
> + *
> + * @param item[in]
> + * Item specification.
> + * @param mask[in]
> + * Bit-masks covering supported fields to compare with spec, last and mask in
> + * \item.
> + * @param size
> + * Bit-Mask size in bytes.
> + *
> + * @return
> + * 0 on success, negative value otherwise.
> + */
> +static int
> +mlx4_flow_item_validate(const struct rte_flow_item *item,
> + const uint8_t *mask, unsigned int size)
> +{
> + int ret = 0;
> +
> + if (!item->spec && (item->mask || item->last))
> + return -1;
> + if (item->spec && !item->mask) {
> + unsigned int i;
> + const uint8_t *spec = item->spec;
> +
> + for (i = 0; i < size; ++i)
> + if ((spec[i] | mask[i]) != mask[i])
> + return -1;
> + }
> + if (item->last && !item->mask) {
> + unsigned int i;
> + const uint8_t *spec = item->last;
> +
> + for (i = 0; i < size; ++i)
> + if ((spec[i] | mask[i]) != mask[i])
> + return -1;
> + }
> + if (item->spec && item->last) {
> + uint8_t spec[size];
> + uint8_t last[size];
> + const uint8_t *apply = mask;
> + unsigned int i;
> +
> + if (item->mask)
> + apply = item->mask;
> + for (i = 0; i < size; ++i) {
> + spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
> + last[i] = ((const uint8_t *)item->last)[i] & apply[i];
> + }
> + ret = memcmp(spec, last, size);
> + }
> + return ret;
> +}
> +
> +static int
> +mlx4_flow_validate_eth(const struct rte_flow_item *item,
> + const uint8_t *mask, unsigned int size)
> +{
> + if (item->mask) {
> + const struct rte_flow_item_eth *mask = item->mask;
> +
> + if (mask->dst.addr_bytes[0] != 0xff ||
> + mask->dst.addr_bytes[1] != 0xff ||
> + mask->dst.addr_bytes[2] != 0xff ||
> + mask->dst.addr_bytes[3] != 0xff ||
> + mask->dst.addr_bytes[4] != 0xff ||
> + mask->dst.addr_bytes[5] != 0xff)
> + return -1;
> + }
> + return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_vlan(const struct rte_flow_item *item,
> + const uint8_t *mask, unsigned int size)
> +{
> + if (item->mask) {
> + const struct rte_flow_item_vlan *mask = item->mask;
> +
> + if (mask->tci != 0 &&
> + ntohs(mask->tci) != 0x0fff)
> + return -1;
> + }
> + return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_ipv4(const struct rte_flow_item *item,
> + const uint8_t *mask, unsigned int size)
> +{
> + if (item->mask) {
> + const struct rte_flow_item_ipv4 *mask = item->mask;
> +
> + if (mask->hdr.src_addr != 0 &&
> + mask->hdr.src_addr != 0xffffffff)
> + return -1;
> + if (mask->hdr.dst_addr != 0 &&
> + mask->hdr.dst_addr != 0xffffffff)
> + return -1;
> + }
> + return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_udp(const struct rte_flow_item *item,
> + const uint8_t *mask, unsigned int size)
> +{
> + if (item->mask) {
> + const struct rte_flow_item_udp *mask = item->mask;
> +
> + if (mask->hdr.src_port != 0 &&
> + mask->hdr.src_port != 0xffff)
> + return -1;
> + if (mask->hdr.dst_port != 0 &&
> + mask->hdr.dst_port != 0xffff)
> + return -1;
> + }
> + return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_tcp(const struct rte_flow_item *item,
> + const uint8_t *mask, unsigned int size)
> +{
> + if (item->mask) {
> + const struct rte_flow_item_tcp *mask = item->mask;
> +
> + if (mask->hdr.src_port != 0 &&
> + mask->hdr.src_port != 0xffff)
> + return -1;
> + if (mask->hdr.dst_port != 0 &&
> + mask->hdr.dst_port != 0xffff)
> + return -1;
> + }
> + return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +/** Graph of supported items and associated actions. */
> +static const struct mlx4_flow_items mlx4_flow_items[] = {
> + [RTE_FLOW_ITEM_TYPE_END] = {
> + .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
> + },
> + [RTE_FLOW_ITEM_TYPE_ETH] = {
> + .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
> + RTE_FLOW_ITEM_TYPE_IPV4),
> + .actions = valid_actions,
> + .mask = &(const struct rte_flow_item_eth){
> + .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
> + .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
> + },
> + .default_mask = &rte_flow_item_eth_mask,
> + .mask_sz = sizeof(struct rte_flow_item_eth),
> + .validate = mlx4_flow_validate_eth,
> + .convert = mlx4_flow_create_eth,
> + .dst_sz = sizeof(struct ibv_flow_spec_eth),
> + },
> + [RTE_FLOW_ITEM_TYPE_VLAN] = {
> + .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4),
> + .actions = valid_actions,
> + .mask = &(const struct rte_flow_item_vlan){
> + /* rte_flow_item_vlan_mask is invalid for mlx4. */
> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> + .tci = 0x0fff,
> +#else
> + .tci = 0xff0f,
> +#endif
> + },
> + .mask_sz = sizeof(struct rte_flow_item_vlan),
> + .validate = mlx4_flow_validate_vlan,
> + .convert = mlx4_flow_create_vlan,
> + .dst_sz = 0,
> + },
> + [RTE_FLOW_ITEM_TYPE_IPV4] = {
> + .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
> + RTE_FLOW_ITEM_TYPE_TCP),
> + .actions = valid_actions,
> + .mask = &(const struct rte_flow_item_ipv4){
> + .hdr = {
> + .src_addr = -1,
> + .dst_addr = -1,
> + },
> + },
> + .default_mask = &rte_flow_item_ipv4_mask,
> + .mask_sz = sizeof(struct rte_flow_item_ipv4),
> + .validate = mlx4_flow_validate_ipv4,
> + .convert = mlx4_flow_create_ipv4,
> + .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
> + },
> + [RTE_FLOW_ITEM_TYPE_UDP] = {
> + .actions = valid_actions,
> + .mask = &(const struct rte_flow_item_udp){
> + .hdr = {
> + .src_port = -1,
> + .dst_port = -1,
> + },
> + },
> + .default_mask = &rte_flow_item_udp_mask,
> + .mask_sz = sizeof(struct rte_flow_item_udp),
> + .validate = mlx4_flow_validate_udp,
> + .convert = mlx4_flow_create_udp,
> + .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
> + },
> + [RTE_FLOW_ITEM_TYPE_TCP] = {
> + .actions = valid_actions,
> + .mask = &(const struct rte_flow_item_tcp){
> + .hdr = {
> + .src_port = -1,
> + .dst_port = -1,
> + },
> + },
> + .default_mask = &rte_flow_item_tcp_mask,
> + .mask_sz = sizeof(struct rte_flow_item_tcp),
> + .validate = mlx4_flow_validate_tcp,
> + .convert = mlx4_flow_create_tcp,
> + .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
> + },
> +};
> +
> +/**
> + * Validate a flow supported by the NIC.
> + *
> + * @param priv
> + * Pointer to private structure.
> + * @param[in] attr
> + * Flow rule attributes.
> + * @param[in] items
> + * Pattern specification (list terminated by the END pattern item).
> + * @param[in] actions
> + * Associated actions (list terminated by the END action).
> + * @param[out] error
> + * Perform verbose error reporting if not NULL.
> + * @param[in, out] flow
> + * Flow structure to update.
> + *
> + * @return
> + * 0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +priv_flow_validate(struct priv *priv,
> + const struct rte_flow_attr *attr,
> + const struct rte_flow_item items[],
> + const struct rte_flow_action actions[],
> + struct rte_flow_error *error,
> + struct mlx4_flow *flow)
> +{
> + const struct mlx4_flow_items *cur_item = mlx4_flow_items;
> + struct mlx4_flow_action action = {
> + .queue = 0,
> + .drop = 0,
> + };
> +
> + (void)priv;
> + if (attr->group) {
> + rte_flow_error_set(error, ENOTSUP,
> + RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
> + NULL,
> + "groups are not supported");
> + return -rte_errno;
> + }
> + if (attr->priority) {
> + rte_flow_error_set(error, ENOTSUP,
> + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
> + NULL,
> + "priorities are not supported");
> + return -rte_errno;
> + }
> + if (attr->egress) {
> + rte_flow_error_set(error, ENOTSUP,
> + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
> + NULL,
> + "egress is not supported");
> + return -rte_errno;
> + }
> + if (!attr->ingress) {
> + rte_flow_error_set(error, ENOTSUP,
> + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
> + NULL,
> + "only ingress is supported");
> + return -rte_errno;
> + }
> + /* Go over items list. */
> + for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
> + const struct mlx4_flow_items *token = NULL;
> + unsigned int i;
> + int err;
> +
> + if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
> + continue;
> + /*
> + * The nic can support patterns with NULL eth spec only
> + * if eth is a single item in a rule.
> + */
> + if (!items->spec &&
> + items->type == RTE_FLOW_ITEM_TYPE_ETH) {
> + const struct rte_flow_item *next = items + 1;
> +
> + if (next->type != RTE_FLOW_ITEM_TYPE_END) {
> + rte_flow_error_set(error, ENOTSUP,
> + RTE_FLOW_ERROR_TYPE_ITEM,
> + items,
> + "the rule requires"
> + " an Ethernet spec");
> + return -rte_errno;
> + }
> + }
> + for (i = 0;
> + cur_item->items &&
> + cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
> + ++i) {
> + if (cur_item->items[i] == items->type) {
> + token = &mlx4_flow_items[items->type];
> + break;
> + }
> + }
> + if (!token)
> + goto exit_item_not_supported;
> + cur_item = token;
> + err = cur_item->validate(items,
> + (const uint8_t *)cur_item->mask,
> + cur_item->mask_sz);
> + if (err)
> + goto exit_item_not_supported;
> + if (flow->ibv_attr && cur_item->convert) {
> + err = cur_item->convert(items,
> + (cur_item->default_mask ?
> + cur_item->default_mask :
> + cur_item->mask),
> + flow);
> + if (err)
> + goto exit_item_not_supported;
> + }
> + flow->offset += cur_item->dst_sz;
> + }
> + /* Go over actions list */
> + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
> + if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
> + continue;
> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
> + action.drop = 1;
> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
> + const struct rte_flow_action_queue *queue =
> + (const struct rte_flow_action_queue *)
> + actions->conf;
> +
> + if (!queue || (queue->index > (priv->rxqs_n - 1)))
> + goto exit_action_not_supported;
> + action.queue = 1;
> + } else {
> + goto exit_action_not_supported;
> + }
> + }
> + if (!action.queue && !action.drop) {
> + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
> + NULL, "no valid action");
> + return -rte_errno;
> + }
> + return 0;
> +exit_item_not_supported:
> + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
> + items, "item not supported");
> + return -rte_errno;
> +exit_action_not_supported:
> + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
> + actions, "action not supported");
> + return -rte_errno;
> +}
> +
> +/**
> + * Validate a flow supported by the NIC.
> + *
> + * @see rte_flow_validate()
> + * @see rte_flow_ops
> + */
> +int
> +mlx4_flow_validate(struct rte_eth_dev *dev,
> + const struct rte_flow_attr *attr,
> + const struct rte_flow_item items[],
> + const struct rte_flow_action actions[],
> + struct rte_flow_error *error)
> +{
> + struct priv *priv = dev->data->dev_private;
> + int ret;
> + struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr) };
> +
> + priv_lock(priv);
> + ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
> + priv_unlock(priv);
> + return ret;
> +}
> +
> +/**
> + * Complete flow rule creation.
> + *
> + * @param priv
> + * Pointer to private structure.
> + * @param ibv_attr
> + * Verbs flow attributes.
> + * @param action
> + * Target action structure.
> + * @param[out] error
> + * Perform verbose error reporting if not NULL.
> + *
> + * @return
> + * A flow if the rule could be created.
> + */
> +static struct rte_flow *
> +priv_flow_create_action_queue(struct priv *priv,
> + struct ibv_flow_attr *ibv_attr,
> + struct mlx4_flow_action *action,
> + struct rte_flow_error *error)
> +{
> + struct rxq *rxq;
> + struct ibv_qp *qp;
> + struct rte_flow *rte_flow;
> +
> + assert(priv->pd);
> + assert(priv->ctx);
> + rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
> + if (!rte_flow) {
> + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
> + NULL, "cannot allocate flow memory");
> + return NULL;
> + }
> + rxq = (*priv->rxqs)[action->queue_id];
> + if (action->drop) {
> + rte_flow->cq =
> + ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
> + &(struct ibv_exp_cq_init_attr){
> + .comp_mask = 0,
> + });
> + if (!rte_flow->cq) {
> + rte_flow_error_set(error, ENOMEM,
> + RTE_FLOW_ERROR_TYPE_HANDLE,
> + NULL, "cannot allocate CQ");
> + goto error;
> + }
> + rte_flow->qp = ibv_exp_create_qp(
> + priv->ctx,
> + &(struct ibv_exp_qp_init_attr){
> + .send_cq = rte_flow->cq,
> + .recv_cq = rte_flow->cq,
> + .cap = {
> + .max_recv_wr = 1,
> + .max_recv_sge = 1,
> + },
> + .qp_type = IBV_QPT_RAW_PACKET,
> + .comp_mask =
> + IBV_EXP_QP_INIT_ATTR_PD |
> + IBV_EXP_QP_INIT_ATTR_PORT |
> + IBV_EXP_QP_INIT_ATTR_RES_DOMAIN,
> + .pd = priv->pd,
> + .res_domain = rxq->rd,
> + .port_num = priv->port,
> + });
> + if (!rte_flow->qp) {
> + rte_flow_error_set(error, ENOMEM,
> + RTE_FLOW_ERROR_TYPE_HANDLE,
> + NULL, "cannot allocate QP");
> + goto error;
> + }
> + qp = rte_flow->qp;
> + } else {
> + rte_flow->rxq = rxq;
> + qp = rxq->qp;
> + }
> + rte_flow->ibv_attr = ibv_attr;
> + rte_flow->ibv_flow = ibv_create_flow(qp, rte_flow->ibv_attr);
> + if (!rte_flow->ibv_flow) {
> + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
> + NULL, "flow rule creation failure");
> + goto error;
> + }
> + return rte_flow;
> +
> +error:
> + assert(rte_flow);
> + if (rte_flow->cq)
> + ibv_destroy_cq(rte_flow->cq);
> + if (rte_flow->qp)
> + ibv_destroy_qp(rte_flow->qp);
> + rte_free(rte_flow->ibv_attr);
> + rte_free(rte_flow);
> + return NULL;
> +}
> +
> +/**
> + * Convert a flow.
> + *
> + * @param priv
> + * Pointer to private structure.
> + * @param[in] attr
> + * Flow rule attributes.
> + * @param[in] items
> + * Pattern specification (list terminated by the END pattern item).
> + * @param[in] actions
> + * Associated actions (list terminated by the END action).
> + * @param[out] error
> + * Perform verbose error reporting if not NULL.
> + *
> + * @return
> + * A flow on success, NULL otherwise.
> + */
> +static struct rte_flow *
> +priv_flow_create(struct priv *priv,
> + const struct rte_flow_attr *attr,
> + const struct rte_flow_item items[],
> + const struct rte_flow_action actions[],
> + struct rte_flow_error *error)
> +{
> + struct rte_flow *rte_flow;
> + struct mlx4_flow_action action;
> + struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr), };
> + int err;
> +
> + err = priv_flow_validate(priv, attr, items, actions, error, &flow);
> + if (err)
> + return NULL;
> + flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
> + if (!flow.ibv_attr) {
> + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
> + NULL, "cannot allocate ibv_attr memory");
> + return NULL;
> + }
> + flow.offset = sizeof(struct ibv_flow_attr);
> + *flow.ibv_attr = (struct ibv_flow_attr){
> + .comp_mask = 0,
> + .type = IBV_FLOW_ATTR_NORMAL,
> + .size = sizeof(struct ibv_flow_attr),
> + .priority = attr->priority,
> + .num_of_specs = 0,
> + .port = priv->port,
> + .flags = 0,
> + };
> + claim_zero(priv_flow_validate(priv, attr, items, actions,
> + error, &flow));
> + action = (struct mlx4_flow_action){
> + .queue = 0,
> + .drop = 0,
> + };
> + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
> + if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
> + continue;
> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
> + action.queue = 1;
> + action.queue_id =
> + ((const struct rte_flow_action_queue *)
> + actions->conf)->index;
> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
> + action.drop = 1;
> + } else {
> + rte_flow_error_set(error, ENOTSUP,
> + RTE_FLOW_ERROR_TYPE_ACTION,
> + actions, "unsupported action");
> + goto exit;
> + }
> + }
> + rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
> + &action, error);
> + return rte_flow;
> +exit:
> + rte_free(flow.ibv_attr);
> + return NULL;
> +}
> +
> +/**
> + * Create a flow.
> + *
> + * @see rte_flow_create()
> + * @see rte_flow_ops
> + */
> +struct rte_flow *
> +mlx4_flow_create(struct rte_eth_dev *dev,
> + const struct rte_flow_attr *attr,
> + const struct rte_flow_item items[],
> + const struct rte_flow_action actions[],
> + struct rte_flow_error *error)
> +{
> + struct priv *priv = dev->data->dev_private;
> + struct rte_flow *flow;
> +
> + priv_lock(priv);
> + flow = priv_flow_create(priv, attr, items, actions, error);
> + if (flow) {
> + LIST_INSERT_HEAD(&priv->flows, flow, next);
> + DEBUG("Flow created %p", (void *)flow);
> + }
> + priv_unlock(priv);
> + return flow;
> +}
> +
> +/**
> + * Destroy a flow.
> + *
> + * @param priv
> + * Pointer to private structure.
> + * @param[in] flow
> + * Flow to destroy.
> + */
> +static void
> +priv_flow_destroy(struct priv *priv, struct rte_flow *flow)
> +{
> + (void)priv;
> + LIST_REMOVE(flow, next);
> + if (flow->ibv_flow)
> + claim_zero(ibv_destroy_flow(flow->ibv_flow));
> + if (flow->qp)
> + claim_zero(ibv_destroy_qp(flow->qp));
> + if (flow->cq)
> + claim_zero(ibv_destroy_cq(flow->cq));
> + rte_free(flow->ibv_attr);
> + DEBUG("Flow destroyed %p", (void *)flow);
> + rte_free(flow);
> +}
> +
> +/**
> + * Destroy a flow.
> + *
> + * @see rte_flow_destroy()
> + * @see rte_flow_ops
> + */
> +int
> +mlx4_flow_destroy(struct rte_eth_dev *dev,
> + struct rte_flow *flow,
> + struct rte_flow_error *error)
> +{
> + struct priv *priv = dev->data->dev_private;
> +
> + (void)error;
> + priv_lock(priv);
> + priv_flow_destroy(priv, flow);
> + priv_unlock(priv);
> + return 0;
> +}
> +
> +/**
> + * Destroy all flows.
> + *
> + * @param priv
> + * Pointer to private structure.
> + */
> +static void
> +priv_flow_flush(struct priv *priv)
> +{
> + while (!LIST_EMPTY(&priv->flows)) {
> + struct rte_flow *flow;
> +
> + flow = LIST_FIRST(&priv->flows);
> + priv_flow_destroy(priv, flow);
> + }
> +}
> +
> +/**
> + * Destroy all flows.
> + *
> + * @see rte_flow_flush()
> + * @see rte_flow_ops
> + */
> +int
> +mlx4_flow_flush(struct rte_eth_dev *dev,
> + struct rte_flow_error *error)
> +{
> + struct priv *priv = dev->data->dev_private;
> +
> + (void)error;
> + priv_lock(priv);
> + priv_flow_flush(priv);
> + priv_unlock(priv);
> + return 0;
> +}
> +
> +/**
> + * Remove all flows.
> + *
> + * Called by dev_stop() to remove all flows.
> + *
> + * @param priv
> + * Pointer to private structure.
> + */
> +void
> +mlx4_priv_flow_stop(struct priv *priv)
> +{
> + struct rte_flow *flow;
> +
> + for (flow = LIST_FIRST(&priv->flows);
> + flow;
> + flow = LIST_NEXT(flow, next)) {
> + claim_zero(ibv_destroy_flow(flow->ibv_flow));
> + flow->ibv_flow = NULL;
> + DEBUG("Flow %p removed", (void *)flow);
> + }
> +}
> +
> +/**
> + * Add all flows.
> + *
> + * @param priv
> + * Pointer to private structure.
> + *
> + * @return
> + * 0 on success, a errno value otherwise and rte_errno is set.
> + */
> +int
> +mlx4_priv_flow_start(struct priv *priv)
> +{
> + struct ibv_qp *qp;
> + struct rte_flow *flow;
> +
> + for (flow = LIST_FIRST(&priv->flows);
> + flow;
> + flow = LIST_NEXT(flow, next)) {
> + qp = flow->qp ? flow->qp : flow->rxq->qp;
> + flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
> + if (!flow->ibv_flow) {
> + DEBUG("Flow %p cannot be applied", (void *)flow);
> + rte_errno = EINVAL;
> + return rte_errno;
> + }
> + DEBUG("Flow %p applied", (void *)flow);
> + }
> + return 0;
> +}
> diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
> new file mode 100644
> index 0000000..66c5be6
> --- /dev/null
> +++ b/drivers/net/mlx4/mlx4_flow.h
> @@ -0,0 +1,104 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright 2017 6WIND S.A.
> + * Copyright 2017 Mellanox.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of 6WIND S.A. nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef RTE_PMD_MLX4_FLOW_H_
> +#define RTE_PMD_MLX4_FLOW_H_
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +#include <sys/queue.h>
> +
> +/* Verbs header. */
> +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic ignored "-Wpedantic"
> +#endif
> +#include <infiniband/verbs.h>
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic error "-Wpedantic"
> +#endif
> +
> +#include <rte_flow.h>
> +#include <rte_flow_driver.h>
> +#include <rte_byteorder.h>
> +
> +#include "mlx4.h"
> +
> +struct rte_flow {
> + LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
> + struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
> + struct ibv_flow *ibv_flow; /**< Verbs flow. */
> + struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
> + struct ibv_qp *qp; /**< Verbs queue pair. */
> + struct ibv_cq *cq; /**< Verbs completion queue. */
> +};
> +
> +int
> +mlx4_flow_validate(struct rte_eth_dev *dev,
> + const struct rte_flow_attr *attr,
> + const struct rte_flow_item items[],
> + const struct rte_flow_action actions[],
> + struct rte_flow_error *error);
> +
> +struct rte_flow *
> +mlx4_flow_create(struct rte_eth_dev *dev,
> + const struct rte_flow_attr *attr,
> + const struct rte_flow_item items[],
> + const struct rte_flow_action actions[],
> + struct rte_flow_error *error);
> +
> +int
> +mlx4_flow_destroy(struct rte_eth_dev *dev,
> + struct rte_flow *flow,
> + struct rte_flow_error *error);
> +
> +int
> +mlx4_flow_flush(struct rte_eth_dev *dev,
> + struct rte_flow_error *error);
> +
> +/** Structure to pass to the conversion function. */
> +struct mlx4_flow {
> + struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
> + unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
> +};
> +
> +struct mlx4_flow_action {
> + uint32_t drop:1; /**< Target is a drop queue. */
> + uint32_t queue:1; /**< Target is a receive queue. */
> + uint32_t queue_id; /**< Identifier of the queue. */
> +};
> +
> +int mlx4_priv_flow_start(struct priv *priv);
> +void mlx4_priv_flow_stop(struct priv *priv);
> +
> +#endif /* RTE_PMD_MLX4_FLOW_H_ */
> --
> 1.8.3.1
>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 15+ messages in thread