DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file
@ 2017-02-21 14:07 Vasily Philipov
  2017-02-21 14:07 ` [dpdk-dev] [PATCH 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
                   ` (5 more replies)
  0 siblings, 6 replies; 15+ messages in thread
From: Vasily Philipov @ 2017-02-21 14:07 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

Make some structs/defines visible from different source files by placing
them into mlx4.h header.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c | 183 ++--------------------------------------------
 drivers/net/mlx4/mlx4.h | 187 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 189 insertions(+), 181 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 79efaaa..82ccac8 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -1,8 +1,8 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright 2012-2015 6WIND S.A.
- *   Copyright 2012 Mellanox.
+ *   Copyright 2012-2017 6WIND S.A.
+ *   Copyright 2012-2017 Mellanox.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -68,10 +68,6 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_ether.h>
 #include <rte_ethdev.h>
 #include <rte_dev.h>
@@ -86,9 +82,6 @@
 #include <rte_log.h>
 #include <rte_alarm.h>
 #include <rte_memory.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
 
 /* Generated configuration header. */
 #include "mlx4_autoconf.h"
@@ -96,21 +89,6 @@
 /* PMD header. */
 #include "mlx4.h"
 
-/* Runtime logging through RTE_LOG() is enabled when not in debugging mode.
- * Intermediate LOG_*() macros add the required end-of-line characters. */
-#ifndef NDEBUG
-#define INFO(...) DEBUG(__VA_ARGS__)
-#define WARN(...) DEBUG(__VA_ARGS__)
-#define ERROR(...) DEBUG(__VA_ARGS__)
-#else
-#define LOG__(level, m, ...) \
-	RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
-#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
-#define INFO(...) LOG_(INFO, __VA_ARGS__)
-#define WARN(...) LOG_(WARNING, __VA_ARGS__)
-#define ERROR(...) LOG_(ERR, __VA_ARGS__)
-#endif
-
 /* Convenience macros for accessing mbuf fields. */
 #define NEXT(m) ((m)->next)
 #define DATA_LEN(m) ((m)->data_len)
@@ -137,157 +115,6 @@
 	 (((val) & (from)) / ((from) / (to))) : \
 	 (((val) & (from)) * ((to) / (from))))
 
-struct mlx4_rxq_stats {
-	unsigned int idx; /**< Mapping index. */
-#ifdef MLX4_PMD_SOFT_COUNTERS
-	uint64_t ipackets;  /**< Total of successfully received packets. */
-	uint64_t ibytes;    /**< Total of successfully received bytes. */
-#endif
-	uint64_t idropped;  /**< Total of packets dropped when RX ring full. */
-	uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
-};
-
-struct mlx4_txq_stats {
-	unsigned int idx; /**< Mapping index. */
-#ifdef MLX4_PMD_SOFT_COUNTERS
-	uint64_t opackets; /**< Total of successfully sent packets. */
-	uint64_t obytes;   /**< Total of successfully sent bytes. */
-#endif
-	uint64_t odropped; /**< Total of packets not sent when TX ring full. */
-};
-
-/* RX element (scattered packets). */
-struct rxq_elt_sp {
-	struct ibv_recv_wr wr; /* Work Request. */
-	struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
-	struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
-};
-
-/* RX element. */
-struct rxq_elt {
-	struct ibv_recv_wr wr; /* Work Request. */
-	struct ibv_sge sge; /* Scatter/Gather Element. */
-	/* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
-};
-
-/* RX queue descriptor. */
-struct rxq {
-	struct priv *priv; /* Back pointer to private data. */
-	struct rte_mempool *mp; /* Memory Pool for allocations. */
-	struct ibv_mr *mr; /* Memory Region (for mp). */
-	struct ibv_cq *cq; /* Completion Queue. */
-	struct ibv_qp *qp; /* Queue Pair. */
-	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
-	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
-	/*
-	 * Each VLAN ID requires a separate flow steering rule.
-	 */
-	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
-	struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
-	struct ibv_flow *promisc_flow; /* Promiscuous flow. */
-	struct ibv_flow *allmulti_flow; /* Multicast flow. */
-	unsigned int port_id; /* Port ID for incoming packets. */
-	unsigned int elts_n; /* (*elts)[] length. */
-	unsigned int elts_head; /* Current index in (*elts)[]. */
-	union {
-		struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
-		struct rxq_elt (*no_sp)[]; /* RX elements. */
-	} elts;
-	unsigned int sp:1; /* Use scattered RX elements. */
-	unsigned int csum:1; /* Enable checksum offloading. */
-	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
-	struct mlx4_rxq_stats stats; /* RX queue counters. */
-	unsigned int socket; /* CPU socket ID for allocations. */
-	struct ibv_exp_res_domain *rd; /* Resource Domain. */
-};
-
-/* TX element. */
-struct txq_elt {
-	struct rte_mbuf *buf;
-};
-
-/* Linear buffer type. It is used when transmitting buffers with too many
- * segments that do not fit the hardware queue (see max_send_sge).
- * Extra segments are copied (linearized) in such buffers, replacing the
- * last SGE during TX.
- * The size is arbitrary but large enough to hold a jumbo frame with
- * 8 segments considering mbuf.buf_len is about 2048 bytes. */
-typedef uint8_t linear_t[16384];
-
-/* TX queue descriptor. */
-struct txq {
-	struct priv *priv; /* Back pointer to private data. */
-	struct {
-		const struct rte_mempool *mp; /* Cached Memory Pool. */
-		struct ibv_mr *mr; /* Memory Region (for mp). */
-		uint32_t lkey; /* mr->lkey */
-	} mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
-	struct ibv_cq *cq; /* Completion Queue. */
-	struct ibv_qp *qp; /* Queue Pair. */
-	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
-	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
-#if MLX4_PMD_MAX_INLINE > 0
-	uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
-#endif
-	unsigned int elts_n; /* (*elts)[] length. */
-	struct txq_elt (*elts)[]; /* TX elements. */
-	unsigned int elts_head; /* Current index in (*elts)[]. */
-	unsigned int elts_tail; /* First element awaiting completion. */
-	unsigned int elts_comp; /* Number of completion requests. */
-	unsigned int elts_comp_cd; /* Countdown for next completion request. */
-	unsigned int elts_comp_cd_init; /* Initial value for countdown. */
-	struct mlx4_txq_stats stats; /* TX queue counters. */
-	linear_t (*elts_linear)[]; /* Linearized buffers. */
-	struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
-	unsigned int socket; /* CPU socket ID for allocations. */
-	struct ibv_exp_res_domain *rd; /* Resource Domain. */
-};
-
-struct priv {
-	struct rte_eth_dev *dev; /* Ethernet device. */
-	struct ibv_context *ctx; /* Verbs context. */
-	struct ibv_device_attr device_attr; /* Device properties. */
-	struct ibv_pd *pd; /* Protection Domain. */
-	/*
-	 * MAC addresses array and configuration bit-field.
-	 * An extra entry that cannot be modified by the DPDK is reserved
-	 * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
-	 */
-	struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
-	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
-	/* VLAN filters. */
-	struct {
-		unsigned int enabled:1; /* If enabled. */
-		unsigned int id:12; /* VLAN ID (0-4095). */
-	} vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
-	/* Device properties. */
-	uint16_t mtu; /* Configured MTU. */
-	uint8_t port; /* Physical port number. */
-	unsigned int started:1; /* Device started, flows enabled. */
-	unsigned int promisc:1; /* Device in promiscuous mode. */
-	unsigned int allmulti:1; /* Device receives all multicast packets. */
-	unsigned int hw_qpg:1; /* QP groups are supported. */
-	unsigned int hw_tss:1; /* TSS is supported. */
-	unsigned int hw_rss:1; /* RSS is supported. */
-	unsigned int hw_csum:1; /* Checksum offload is supported. */
-	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
-	unsigned int rss:1; /* RSS is enabled. */
-	unsigned int vf:1; /* This is a VF device. */
-	unsigned int pending_alarm:1; /* An alarm is pending. */
-#ifdef INLINE_RECV
-	unsigned int inl_recv_size; /* Inline recv size */
-#endif
-	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
-	/* RX/TX queues. */
-	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
-	unsigned int rxqs_n; /* RX queues array size. */
-	unsigned int txqs_n; /* TX queues array size. */
-	struct rxq *(*rxqs)[]; /* RX queues. */
-	struct txq *(*txqs)[]; /* TX queues. */
-	struct rte_intr_handle intr_handle; /* Interrupt handler. */
-	rte_spinlock_t lock; /* Lock for control functions. */
-};
-
 /* Local storage for secondary process data. */
 struct mlx4_secondary_data {
 	struct rte_eth_dev_data data; /* Local device data. */
@@ -335,8 +162,7 @@ struct mlx4_secondary_data {
  * @param priv
  *   Pointer to private structure.
  */
-static void
-priv_lock(struct priv *priv)
+void priv_lock(struct priv *priv)
 {
 	rte_spinlock_lock(&priv->lock);
 }
@@ -347,8 +173,7 @@ struct mlx4_secondary_data {
  * @param priv
  *   Pointer to private structure.
  */
-static void
-priv_unlock(struct priv *priv)
+void priv_unlock(struct priv *priv)
 {
 	rte_spinlock_unlock(&priv->lock);
 }
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 4c7505e..70c9ecd 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -1,8 +1,8 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright 2012-2015 6WIND S.A.
- *   Copyright 2012 Mellanox.
+ *   Copyright 2012-2017 6WIND S.A.
+ *   Copyright 2012-2017 Mellanox.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -39,6 +39,33 @@
 #include <limits.h>
 
 /*
+ * Runtime logging through RTE_LOG() is enabled when not in debugging mode.
+ * Intermediate LOG_*() macros add the required end-of-line characters.
+ */
+#ifndef NDEBUG
+#define INFO(...) DEBUG(__VA_ARGS__)
+#define WARN(...) DEBUG(__VA_ARGS__)
+#define ERROR(...) DEBUG(__VA_ARGS__)
+#else
+#define LOG__(level, m, ...) \
+	RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
+#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
+#define INFO(...) LOG_(INFO, __VA_ARGS__)
+#define WARN(...) LOG_(WARNING, __VA_ARGS__)
+#define ERROR(...) LOG_(ERR, __VA_ARGS__)
+#endif
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+/*
  * Maximum number of simultaneous MAC addresses supported.
  *
  * According to ConnectX's Programmer Reference Manual:
@@ -160,4 +187,160 @@ enum {
 #define claim_positive(...) (__VA_ARGS__)
 #endif /* NDEBUG */
 
+struct mlx4_rxq_stats {
+	unsigned int idx; /**< Mapping index. */
+#ifdef MLX4_PMD_SOFT_COUNTERS
+	uint64_t ipackets; /**< Total of successfully received packets. */
+	uint64_t ibytes; /**< Total of successfully received bytes. */
+#endif
+	uint64_t idropped; /**< Total of packets dropped when RX ring full. */
+	uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
+};
+
+/* RX element (scattered packets). */
+struct rxq_elt_sp {
+	struct ibv_recv_wr wr; /* Work Request. */
+	struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
+	struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
+};
+
+/* RX element. */
+struct rxq_elt {
+	struct ibv_recv_wr wr; /* Work Request. */
+	struct ibv_sge sge; /* Scatter/Gather Element. */
+	/* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
+};
+
+/* RX queue descriptor. */
+struct rxq {
+	struct priv *priv; /* Back pointer to private data. */
+	struct rte_mempool *mp; /* Memory Pool for allocations. */
+	struct ibv_mr *mr; /* Memory Region (for mp). */
+	struct ibv_cq *cq; /* Completion Queue. */
+	struct ibv_qp *qp; /* Queue Pair. */
+	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
+	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
+	/*
+	 * Each VLAN ID requires a separate flow steering rule.
+	 */
+	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
+	struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
+	struct ibv_flow *promisc_flow; /* Promiscuous flow. */
+	struct ibv_flow *allmulti_flow; /* Multicast flow. */
+	unsigned int port_id; /* Port ID for incoming packets. */
+	unsigned int elts_n; /* (*elts)[] length. */
+	unsigned int elts_head; /* Current index in (*elts)[]. */
+	union {
+		struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
+		struct rxq_elt (*no_sp)[]; /* RX elements. */
+	} elts;
+	unsigned int sp:1; /* Use scattered RX elements. */
+	unsigned int csum:1; /* Enable checksum offloading. */
+	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
+	struct mlx4_rxq_stats stats; /* RX queue counters. */
+	unsigned int socket; /* CPU socket ID for allocations. */
+	struct ibv_exp_res_domain *rd; /* Resource Domain. */
+};
+
+/* TX element. */
+struct txq_elt {
+	struct rte_mbuf *buf;
+};
+
+struct mlx4_txq_stats {
+	unsigned int idx; /**< Mapping index. */
+#ifdef MLX4_PMD_SOFT_COUNTERS
+	uint64_t opackets; /**< Total of successfully sent packets. */
+	uint64_t obytes;   /**< Total of successfully sent bytes. */
+#endif
+	uint64_t odropped; /**< Total of packets not sent when TX ring full. */
+};
+
+/*
+ * Linear buffer type. It is used when transmitting buffers with too many
+ * segments that do not fit the hardware queue (see max_send_sge).
+ * Extra segments are copied (linearized) in such buffers, replacing the
+ * last SGE during TX.
+ * The size is arbitrary but large enough to hold a jumbo frame with
+ * 8 segments considering mbuf.buf_len is about 2048 bytes.
+ */
+typedef uint8_t linear_t[16384];
+
+/* TX queue descriptor. */
+struct txq {
+	struct priv *priv; /* Back pointer to private data. */
+	struct {
+		const struct rte_mempool *mp; /* Cached Memory Pool. */
+		struct ibv_mr *mr; /* Memory Region (for mp). */
+		uint32_t lkey; /* mr->lkey */
+	} mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
+	struct ibv_cq *cq; /* Completion Queue. */
+	struct ibv_qp *qp; /* Queue Pair. */
+	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
+	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
+#if MLX4_PMD_MAX_INLINE > 0
+	uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
+#endif
+	unsigned int elts_n; /* (*elts)[] length. */
+	struct txq_elt (*elts)[]; /* TX elements. */
+	unsigned int elts_head; /* Current index in (*elts)[]. */
+	unsigned int elts_tail; /* First element awaiting completion. */
+	unsigned int elts_comp; /* Number of completion requests. */
+	unsigned int elts_comp_cd; /* Countdown for next completion request. */
+	unsigned int elts_comp_cd_init; /* Initial value for countdown. */
+	struct mlx4_txq_stats stats; /* TX queue counters. */
+	linear_t (*elts_linear)[]; /* Linearized buffers. */
+	struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
+	unsigned int socket; /* CPU socket ID for allocations. */
+	struct ibv_exp_res_domain *rd; /* Resource Domain. */
+};
+
+struct priv {
+	struct rte_eth_dev *dev; /* Ethernet device. */
+	struct ibv_context *ctx; /* Verbs context. */
+	struct ibv_device_attr device_attr; /* Device properties. */
+	struct ibv_pd *pd; /* Protection Domain. */
+	/*
+	 * MAC addresses array and configuration bit-field.
+	 * An extra entry that cannot be modified by the DPDK is reserved
+	 * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
+	 */
+	struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
+	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
+	/* VLAN filters. */
+	struct {
+		unsigned int enabled:1; /* If enabled. */
+		unsigned int id:12; /* VLAN ID (0-4095). */
+	} vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
+	/* Device properties. */
+	uint16_t mtu; /* Configured MTU. */
+	uint8_t port; /* Physical port number. */
+	unsigned int started:1; /* Device started, flows enabled. */
+	unsigned int promisc:1; /* Device in promiscuous mode. */
+	unsigned int allmulti:1; /* Device receives all multicast packets. */
+	unsigned int hw_qpg:1; /* QP groups are supported. */
+	unsigned int hw_tss:1; /* TSS is supported. */
+	unsigned int hw_rss:1; /* RSS is supported. */
+	unsigned int hw_csum:1; /* Checksum offload is supported. */
+	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
+	unsigned int rss:1; /* RSS is enabled. */
+	unsigned int vf:1; /* This is a VF device. */
+	unsigned int pending_alarm:1; /* An alarm is pending. */
+#ifdef INLINE_RECV
+	unsigned int inl_recv_size; /* Inline recv size */
+#endif
+	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
+	/* RX/TX queues. */
+	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
+	unsigned int rxqs_n; /* RX queues array size. */
+	unsigned int txqs_n; /* TX queues array size. */
+	struct rxq *(*rxqs)[]; /* RX queues. */
+	struct txq *(*txqs)[]; /* TX queues. */
+	struct rte_intr_handle intr_handle; /* Interrupt handler. */
+	rte_spinlock_t lock; /* Lock for control functions. */
+};
+
+void priv_lock(struct priv *priv);
+void priv_unlock(struct priv *priv);
+
 #endif /* RTE_PMD_MLX4_H_ */
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [dpdk-dev] [PATCH 2/2] net/mlx4: support basic flow items and actions
  2017-02-21 14:07 [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
@ 2017-02-21 14:07 ` Vasily Philipov
  2017-02-22  8:37   ` Nélio Laranjeiro
  2017-02-22  8:37 ` [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Nélio Laranjeiro
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 15+ messages in thread
From: Vasily Philipov @ 2017-02-21 14:07 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

Adding support for the next items: eth, vlan, ipv4, udp, tcp and for the
next actions: queue, drop

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/Makefile    |    3 +-
 drivers/net/mlx4/mlx4.c      |   60 ++-
 drivers/net/mlx4/mlx4.h      |    3 +
 drivers/net/mlx4/mlx4_flow.c | 1053 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx4/mlx4_flow.h |  104 +++++
 5 files changed, 1220 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/mlx4/mlx4_flow.c
 create mode 100644 drivers/net/mlx4/mlx4_flow.h

diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 68c5902..1d463f7 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -36,6 +36,7 @@ LIB = librte_pmd_mlx4.a
 
 # Sources.
 SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_flow.c
 
 # Dependencies.
 DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += lib/librte_ether
@@ -129,7 +130,7 @@ mlx4_autoconf.h: mlx4_autoconf.h.new
 		cmp '$<' '$@' $(AUTOCONF_OUTPUT) || \
 		mv '$<' '$@'
 
-mlx4.o: mlx4_autoconf.h
+$(SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD):.c=.o): mlx4_autoconf.h
 
 clean_mlx4: FORCE
 	$Q rm -f -- mlx4_autoconf.h mlx4_autoconf.h.new
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 82ccac8..cc2ebfa 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -82,12 +82,14 @@
 #include <rte_log.h>
 #include <rte_alarm.h>
 #include <rte_memory.h>
+#include <rte_flow.h>
 
 /* Generated configuration header. */
 #include "mlx4_autoconf.h"
 
-/* PMD header. */
+/* PMD headers. */
 #include "mlx4.h"
+#include "mlx4_flow.h"
 
 /* Convenience macros for accessing mbuf fields. */
 #define NEXT(m) ((m)->next)
@@ -2351,6 +2353,7 @@ struct txq_mp2mr_mbuf_check_data {
 	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
 	*attr = (struct ibv_flow_attr){
 		.type = IBV_FLOW_ATTR_NORMAL,
+		.priority = 3,
 		.num_of_specs = 1,
 		.port = priv->port,
 		.flags = 0
@@ -3936,6 +3939,7 @@ struct txq_mp2mr_mbuf_check_data {
 {
 	struct priv *priv = dev->data->dev_private;
 	unsigned int i = 0;
+	unsigned int err = 0;
 	unsigned int r;
 	struct rxq *rxq;
 
@@ -3985,8 +3989,9 @@ struct txq_mp2mr_mbuf_check_data {
 		return -ret;
 	} while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
 	priv_dev_interrupt_handler_install(priv, dev);
+	err = mlx4_priv_flow_start(priv);
 	priv_unlock(priv);
-	return 0;
+	return -err;
 }
 
 /**
@@ -4021,6 +4026,7 @@ struct txq_mp2mr_mbuf_check_data {
 		rxq = (*priv->rxqs)[0];
 		r = priv->rxqs_n;
 	}
+	mlx4_priv_flow_stop(priv);
 	/* Iterate only once when RSS is enabled. */
 	do {
 		/* Ignore nonexistent RX queues. */
@@ -5022,6 +5028,55 @@ struct txq_mp2mr_mbuf_check_data {
 	return -ret;
 }
 
+const struct rte_flow_ops mlx4_flow_ops = {
+	.validate = mlx4_flow_validate,
+	.create = mlx4_flow_create,
+	.destroy = mlx4_flow_destroy,
+	.flush = mlx4_flow_flush,
+	.query = NULL,
+};
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param filter_type
+ *   Filter type.
+ * @param filter_op
+ *   Operation to perform.
+ * @param arg
+ *   Pointer to operation-specific structure.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+mlx4_dev_filter_ctrl(struct rte_eth_dev *dev,
+		     enum rte_filter_type filter_type,
+		     enum rte_filter_op filter_op,
+		     void *arg)
+{
+	int ret = EINVAL;
+
+	switch (filter_type) {
+	case RTE_ETH_FILTER_GENERIC:
+		if (filter_op != RTE_ETH_FILTER_GET)
+			return -EINVAL;
+		*(const void **)arg = &mlx4_flow_ops;
+		return 0;
+	case RTE_ETH_FILTER_FDIR:
+		DEBUG("%p: filter type FDIR is not supported by this PMD",
+		      (void *)dev);
+		break;
+	default:
+		ERROR("%p: filter type (%d) not supported",
+		      (void *)dev, filter_type);
+		break;
+	}
+	return -ret;
+}
+
 static const struct eth_dev_ops mlx4_dev_ops = {
 	.dev_configure = mlx4_dev_configure,
 	.dev_start = mlx4_dev_start,
@@ -5056,6 +5111,7 @@ struct txq_mp2mr_mbuf_check_data {
 	.mac_addr_add = mlx4_mac_addr_add,
 	.mac_addr_set = mlx4_mac_addr_set,
 	.mtu_set = mlx4_dev_set_mtu,
+	.filter_ctrl = mlx4_dev_filter_ctrl,
 };
 
 /**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 70c9ecd..fac408b 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -295,6 +295,8 @@ struct txq {
 	struct ibv_exp_res_domain *rd; /* Resource Domain. */
 };
 
+struct rte_flow;
+
 struct priv {
 	struct rte_eth_dev *dev; /* Ethernet device. */
 	struct ibv_context *ctx; /* Verbs context. */
@@ -337,6 +339,7 @@ struct priv {
 	struct rxq *(*rxqs)[]; /* RX queues. */
 	struct txq *(*txqs)[]; /* TX queues. */
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
+	LIST_HEAD(mlx4_flows, rte_flow) flows;
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
 
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
new file mode 100644
index 0000000..2328a18
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -0,0 +1,1053 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_malloc.h>
+
+/* Generated configuration header. */
+#include "mlx4_autoconf.h"
+
+/* PMD headers. */
+#include "mlx4.h"
+#include "mlx4_flow.h"
+
+/** Static initializer for items. */
+#define ITEMS(...) \
+	(const enum rte_flow_item_type []){ \
+		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
+	}
+
+/** Structure to generate a simple graph of layers supported by the NIC. */
+struct mlx4_flow_items {
+	/** List of possible actions for these items. */
+	const enum rte_flow_action_type *const actions;
+	/** Bit-masks corresponding to the possibilities for the item. */
+	const void *mask;
+	/**
+	 * Default bit-masks to use when item->mask is not provided. When
+	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
+	 * used instead.
+	 */
+	const void *default_mask;
+	/** Bit-masks size in bytes. */
+	const unsigned int mask_sz;
+	/**
+	 * Check support for a given item.
+	 *
+	 * @param item[in]
+	 *   Item specification.
+	 * @param mask[in]
+	 *   Bit-masks covering supported fields to compare with spec,
+	 *   last and mask in
+	 *   \item.
+	 * @param size
+	 *   Bit-Mask size in bytes.
+	 *
+	 * @return
+	 *   0 on success, negative value otherwise.
+	 */
+	int (*validate)(const struct rte_flow_item *item,
+			const uint8_t *mask, unsigned int size);
+	/**
+	 * Conversion function from rte_flow to NIC specific flow.
+	 *
+	 * @param item
+	 *   rte_flow item to convert.
+	 * @param default_mask
+	 *   Default bit-masks to use when item->mask is not provided.
+	 * @param data
+	 *   Internal structure to store the conversion.
+	 *
+	 * @return
+	 *   0 on success, negative value otherwise.
+	 */
+	int (*convert)(const struct rte_flow_item *item,
+		       const void *default_mask,
+		       void *data);
+	/** Size in bytes of the destination structure. */
+	const unsigned int dst_sz;
+	/** List of possible following items.  */
+	const enum rte_flow_item_type *const items;
+};
+
+/** Valid action for this PMD. */
+static const enum rte_flow_action_type valid_actions[] = {
+	RTE_FLOW_ACTION_TYPE_DROP,
+	RTE_FLOW_ACTION_TYPE_QUEUE,
+	RTE_FLOW_ACTION_TYPE_END,
+};
+
+/**
+ * Convert Ethernet item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx4_flow_create_eth(const struct rte_flow_item *item,
+		     const void *default_mask,
+		     void *data)
+{
+	const struct rte_flow_item_eth *spec = item->spec;
+	const struct rte_flow_item_eth *mask = item->mask;
+	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	struct ibv_flow_spec_eth *eth;
+	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+	unsigned int i;
+
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 2;
+	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*eth = (struct ibv_flow_spec_eth) {
+		.type = IBV_FLOW_SPEC_ETH,
+		.size = eth_size,
+	};
+	if (!spec) {
+		flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
+		return 0;
+	}
+	if (!mask)
+		mask = default_mask;
+	memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
+	memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+	memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
+	memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+	/* Remove unwanted bits from values. */
+	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+		eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
+		eth->val.src_mac[i] &= eth->mask.src_mac[i];
+	}
+	return 0;
+}
+
+/**
+ * Convert VLAN item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx4_flow_create_vlan(const struct rte_flow_item *item,
+		      const void *default_mask,
+		      void *data)
+{
+	const struct rte_flow_item_vlan *spec = item->spec;
+	const struct rte_flow_item_vlan *mask = item->mask;
+	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	struct ibv_flow_spec_eth *eth;
+	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+
+	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
+	if (!spec)
+		return 0;
+	if (!mask)
+		mask = default_mask;
+	eth->val.vlan_tag = spec->tci;
+	eth->mask.vlan_tag = mask->tci;
+	eth->val.vlan_tag &= eth->mask.vlan_tag;
+	return 0;
+}
+
+/**
+ * Convert IPv4 item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx4_flow_create_ipv4(const struct rte_flow_item *item,
+		      const void *default_mask,
+		      void *data)
+{
+	const struct rte_flow_item_ipv4 *spec = item->spec;
+	const struct rte_flow_item_ipv4 *mask = item->mask;
+	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	struct ibv_flow_spec_ipv4 *ipv4;
+	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4);
+
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 1;
+	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*ipv4 = (struct ibv_flow_spec_ipv4) {
+		.type = IBV_FLOW_SPEC_IPV4,
+		.size = ipv4_size,
+	};
+	if (!spec)
+		return 0;
+	ipv4->val = (struct ibv_flow_ipv4_filter) {
+		.src_ip = spec->hdr.src_addr,
+		.dst_ip = spec->hdr.dst_addr,
+	};
+	if (!mask)
+		mask = default_mask;
+	ipv4->mask = (struct ibv_flow_ipv4_filter) {
+		.src_ip = mask->hdr.src_addr,
+		.dst_ip = mask->hdr.dst_addr,
+	};
+	/* Remove unwanted bits from values. */
+	ipv4->val.src_ip &= ipv4->mask.src_ip;
+	ipv4->val.dst_ip &= ipv4->mask.dst_ip;
+	return 0;
+}
+
+/**
+ * Convert UDP item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx4_flow_create_udp(const struct rte_flow_item *item,
+		     const void *default_mask,
+		     void *data)
+{
+	const struct rte_flow_item_udp *spec = item->spec;
+	const struct rte_flow_item_udp *mask = item->mask;
+	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	struct ibv_flow_spec_tcp_udp *udp;
+	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 0;
+	udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*udp = (struct ibv_flow_spec_tcp_udp) {
+		.type = IBV_FLOW_SPEC_UDP,
+		.size = udp_size,
+	};
+	if (!spec)
+		return 0;
+	udp->val.dst_port = spec->hdr.dst_port;
+	udp->val.src_port = spec->hdr.src_port;
+	if (!mask)
+		mask = default_mask;
+	udp->mask.dst_port = mask->hdr.dst_port;
+	udp->mask.src_port = mask->hdr.src_port;
+	/* Remove unwanted bits from values. */
+	udp->val.src_port &= udp->mask.src_port;
+	udp->val.dst_port &= udp->mask.dst_port;
+	return 0;
+}
+
+/**
+ * Convert TCP item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx4_flow_create_tcp(const struct rte_flow_item *item,
+		     const void *default_mask,
+		     void *data)
+{
+	const struct rte_flow_item_tcp *spec = item->spec;
+	const struct rte_flow_item_tcp *mask = item->mask;
+	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	struct ibv_flow_spec_tcp_udp *tcp;
+	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 0;
+	tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*tcp = (struct ibv_flow_spec_tcp_udp) {
+		.type = IBV_FLOW_SPEC_TCP,
+		.size = tcp_size,
+	};
+	if (!spec)
+		return 0;
+	tcp->val.dst_port = spec->hdr.dst_port;
+	tcp->val.src_port = spec->hdr.src_port;
+	if (!mask)
+		mask = default_mask;
+	tcp->mask.dst_port = mask->hdr.dst_port;
+	tcp->mask.src_port = mask->hdr.src_port;
+	/* Remove unwanted bits from values. */
+	tcp->val.src_port &= tcp->mask.src_port;
+	tcp->val.dst_port &= tcp->mask.dst_port;
+	return 0;
+}
+
+/**
+ * Check support for a given item.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param mask[in]
+ *   Bit-masks covering supported fields to compare with spec, last and mask in
+ *   \item.
+ * @param size
+ *   Bit-Mask size in bytes.
+ *
+ * @return
+ *   0 on success, negative value otherwise.
+ */
+static int
+mlx4_flow_item_validate(const struct rte_flow_item *item,
+			const uint8_t *mask, unsigned int size)
+{
+	int ret = 0;
+
+	if (!item->spec && (item->mask || item->last))
+		return -1;
+	if (item->spec && !item->mask) {
+		unsigned int i;
+		const uint8_t *spec = item->spec;
+
+		for (i = 0; i < size; ++i)
+			if ((spec[i] | mask[i]) != mask[i])
+				return -1;
+	}
+	if (item->last && !item->mask) {
+		unsigned int i;
+		const uint8_t *spec = item->last;
+
+		for (i = 0; i < size; ++i)
+			if ((spec[i] | mask[i]) != mask[i])
+				return -1;
+	}
+	if (item->spec && item->last) {
+		uint8_t spec[size];
+		uint8_t last[size];
+		const uint8_t *apply = mask;
+		unsigned int i;
+
+		if (item->mask)
+			apply = item->mask;
+		for (i = 0; i < size; ++i) {
+			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
+			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
+		}
+		ret = memcmp(spec, last, size);
+	}
+	return ret;
+}
+
+static int
+mlx4_flow_validate_eth(const struct rte_flow_item *item,
+		       const uint8_t *mask, unsigned int size)
+{
+	if (item->mask) {
+		const struct rte_flow_item_eth *mask = item->mask;
+
+		if (mask->dst.addr_bytes[0] != 0xff ||
+				mask->dst.addr_bytes[1] != 0xff ||
+				mask->dst.addr_bytes[2] != 0xff ||
+				mask->dst.addr_bytes[3] != 0xff ||
+				mask->dst.addr_bytes[4] != 0xff ||
+				mask->dst.addr_bytes[5] != 0xff)
+			return -1;
+	}
+	return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_vlan(const struct rte_flow_item *item,
+			const uint8_t *mask, unsigned int size)
+{
+	if (item->mask) {
+		const struct rte_flow_item_vlan *mask = item->mask;
+
+		if (mask->tci != 0 &&
+		    ntohs(mask->tci) != 0x0fff)
+			return -1;
+	}
+	return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_ipv4(const struct rte_flow_item *item,
+			const uint8_t *mask, unsigned int size)
+{
+	if (item->mask) {
+		const struct rte_flow_item_ipv4 *mask = item->mask;
+
+		if (mask->hdr.src_addr != 0 &&
+		    mask->hdr.src_addr != 0xffffffff)
+			return -1;
+		if (mask->hdr.dst_addr != 0 &&
+		    mask->hdr.dst_addr != 0xffffffff)
+			return -1;
+	}
+	return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_udp(const struct rte_flow_item *item,
+		       const uint8_t *mask, unsigned int size)
+{
+	if (item->mask) {
+		const struct rte_flow_item_udp *mask = item->mask;
+
+		if (mask->hdr.src_port != 0 &&
+		    mask->hdr.src_port != 0xffff)
+			return -1;
+		if (mask->hdr.dst_port != 0 &&
+		    mask->hdr.dst_port != 0xffff)
+			return -1;
+	}
+	return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_tcp(const struct rte_flow_item *item,
+		       const uint8_t *mask, unsigned int size)
+{
+	if (item->mask) {
+		const struct rte_flow_item_tcp *mask = item->mask;
+
+		if (mask->hdr.src_port != 0 &&
+		    mask->hdr.src_port != 0xffff)
+			return -1;
+		if (mask->hdr.dst_port != 0 &&
+		    mask->hdr.dst_port != 0xffff)
+			return -1;
+	}
+	return mlx4_flow_item_validate(item, mask, size);
+}
+
+/** Graph of supported items and associated actions. */
+static const struct mlx4_flow_items mlx4_flow_items[] = {
+	[RTE_FLOW_ITEM_TYPE_END] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
+	},
+	[RTE_FLOW_ITEM_TYPE_ETH] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
+			       RTE_FLOW_ITEM_TYPE_IPV4),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_eth){
+			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		},
+		.default_mask = &rte_flow_item_eth_mask,
+		.mask_sz = sizeof(struct rte_flow_item_eth),
+		.validate = mlx4_flow_validate_eth,
+		.convert = mlx4_flow_create_eth,
+		.dst_sz = sizeof(struct ibv_flow_spec_eth),
+	},
+	[RTE_FLOW_ITEM_TYPE_VLAN] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_vlan){
+		/* rte_flow_item_vlan_mask is invalid for mlx4. */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+			.tci = 0x0fff,
+#else
+			.tci = 0xff0f,
+#endif
+		},
+		.mask_sz = sizeof(struct rte_flow_item_vlan),
+		.validate = mlx4_flow_validate_vlan,
+		.convert = mlx4_flow_create_vlan,
+		.dst_sz = 0,
+	},
+	[RTE_FLOW_ITEM_TYPE_IPV4] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
+			       RTE_FLOW_ITEM_TYPE_TCP),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_ipv4){
+			.hdr = {
+				.src_addr = -1,
+				.dst_addr = -1,
+			},
+		},
+		.default_mask = &rte_flow_item_ipv4_mask,
+		.mask_sz = sizeof(struct rte_flow_item_ipv4),
+		.validate = mlx4_flow_validate_ipv4,
+		.convert = mlx4_flow_create_ipv4,
+		.dst_sz = sizeof(struct ibv_flow_spec_ipv4),
+	},
+	[RTE_FLOW_ITEM_TYPE_UDP] = {
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_udp){
+			.hdr = {
+				.src_port = -1,
+				.dst_port = -1,
+			},
+		},
+		.default_mask = &rte_flow_item_udp_mask,
+		.mask_sz = sizeof(struct rte_flow_item_udp),
+		.validate = mlx4_flow_validate_udp,
+		.convert = mlx4_flow_create_udp,
+		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
+	},
+	[RTE_FLOW_ITEM_TYPE_TCP] = {
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_tcp){
+			.hdr = {
+				.src_port = -1,
+				.dst_port = -1,
+			},
+		},
+		.default_mask = &rte_flow_item_tcp_mask,
+		.mask_sz = sizeof(struct rte_flow_item_tcp),
+		.validate = mlx4_flow_validate_tcp,
+		.convert = mlx4_flow_create_tcp,
+		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
+	},
+};
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] items
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ * @param[in, out] flow
+ *   Flow structure to update.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_validate(struct priv *priv,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error,
+		   struct mlx4_flow *flow)
+{
+	const struct mlx4_flow_items *cur_item = mlx4_flow_items;
+	struct mlx4_flow_action action = {
+		.queue = 0,
+		.drop = 0,
+	};
+
+	(void)priv;
+	if (attr->group) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+				   NULL,
+				   "groups are not supported");
+		return -rte_errno;
+	}
+	if (attr->priority) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+				   NULL,
+				   "priorities are not supported");
+		return -rte_errno;
+	}
+	if (attr->egress) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+				   NULL,
+				   "egress is not supported");
+		return -rte_errno;
+	}
+	if (!attr->ingress) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+				   NULL,
+				   "only ingress is supported");
+		return -rte_errno;
+	}
+	/* Go over items list. */
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+		const struct mlx4_flow_items *token = NULL;
+		unsigned int i;
+		int err;
+
+		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+			continue;
+		/*
+		 * The nic can support patterns with NULL eth spec only
+		 * if eth is a single item in a rule.
+		 */
+		if (!items->spec &&
+			items->type == RTE_FLOW_ITEM_TYPE_ETH) {
+			const struct rte_flow_item *next = items + 1;
+
+			if (next->type != RTE_FLOW_ITEM_TYPE_END) {
+				rte_flow_error_set(error, ENOTSUP,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   items,
+						   "the rule requires"
+						   " an Ethernet spec");
+				return -rte_errno;
+			}
+		}
+		for (i = 0;
+		     cur_item->items &&
+		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
+		     ++i) {
+			if (cur_item->items[i] == items->type) {
+				token = &mlx4_flow_items[items->type];
+				break;
+			}
+		}
+		if (!token)
+			goto exit_item_not_supported;
+		cur_item = token;
+		err = cur_item->validate(items,
+					(const uint8_t *)cur_item->mask,
+					 cur_item->mask_sz);
+		if (err)
+			goto exit_item_not_supported;
+		if (flow->ibv_attr && cur_item->convert) {
+			err = cur_item->convert(items,
+						(cur_item->default_mask ?
+						 cur_item->default_mask :
+						 cur_item->mask),
+						 flow);
+			if (err)
+				goto exit_item_not_supported;
+		}
+		flow->offset += cur_item->dst_sz;
+	}
+	/* Go over actions list */
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+			continue;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+			action.drop = 1;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+			const struct rte_flow_action_queue *queue =
+				(const struct rte_flow_action_queue *)
+				actions->conf;
+
+			if (!queue || (queue->index > (priv->rxqs_n - 1)))
+				goto exit_action_not_supported;
+			action.queue = 1;
+		} else {
+			goto exit_action_not_supported;
+		}
+	}
+	if (!action.queue && !action.drop) {
+		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "no valid action");
+		return -rte_errno;
+	}
+	return 0;
+exit_item_not_supported:
+	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+			   items, "item not supported");
+	return -rte_errno;
+exit_action_not_supported:
+	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+			   actions, "action not supported");
+	return -rte_errno;
+}
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_validate(struct rte_eth_dev *dev,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	int ret;
+	struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr) };
+
+	priv_lock(priv);
+	ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
+	priv_unlock(priv);
+	return ret;
+}
+
+/**
+ * Complete flow rule creation.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param ibv_attr
+ *   Verbs flow attributes.
+ * @param action
+ *   Target action structure.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A flow if the rule could be created.
+ */
+static struct rte_flow *
+priv_flow_create_action_queue(struct priv *priv,
+			      struct ibv_flow_attr *ibv_attr,
+			      struct mlx4_flow_action *action,
+			      struct rte_flow_error *error)
+{
+	struct rxq *rxq;
+	struct ibv_qp *qp;
+	struct rte_flow *rte_flow;
+
+	assert(priv->pd);
+	assert(priv->ctx);
+	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
+	if (!rte_flow) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "cannot allocate flow memory");
+		return NULL;
+	}
+	rxq = (*priv->rxqs)[action->queue_id];
+	if (action->drop) {
+		rte_flow->cq =
+			ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
+					  &(struct ibv_exp_cq_init_attr){
+						  .comp_mask = 0,
+					  });
+		if (!rte_flow->cq) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "cannot allocate CQ");
+			goto error;
+		}
+		rte_flow->qp = ibv_exp_create_qp(
+			priv->ctx,
+			&(struct ibv_exp_qp_init_attr){
+				.send_cq = rte_flow->cq,
+				.recv_cq = rte_flow->cq,
+				.cap = {
+					.max_recv_wr = 1,
+					.max_recv_sge = 1,
+				},
+				.qp_type = IBV_QPT_RAW_PACKET,
+				.comp_mask =
+					IBV_EXP_QP_INIT_ATTR_PD |
+					IBV_EXP_QP_INIT_ATTR_PORT |
+					IBV_EXP_QP_INIT_ATTR_RES_DOMAIN,
+				.pd = priv->pd,
+				.res_domain = rxq->rd,
+				.port_num = priv->port,
+			});
+		if (!rte_flow->qp) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "cannot allocate QP");
+			goto error;
+		}
+		qp = rte_flow->qp;
+	} else {
+		rte_flow->rxq = rxq;
+		qp = rxq->qp;
+	}
+	rte_flow->ibv_attr = ibv_attr;
+	rte_flow->ibv_flow = ibv_create_flow(qp, rte_flow->ibv_attr);
+	if (!rte_flow->ibv_flow) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "flow rule creation failure");
+		goto error;
+	}
+	return rte_flow;
+
+error:
+	assert(rte_flow);
+	if (rte_flow->cq)
+		ibv_destroy_cq(rte_flow->cq);
+	if (rte_flow->qp)
+		ibv_destroy_qp(rte_flow->qp);
+	rte_free(rte_flow->ibv_attr);
+	rte_free(rte_flow);
+	return NULL;
+}
+
+/**
+ * Convert a flow.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] items
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A flow on success, NULL otherwise.
+ */
+static struct rte_flow *
+priv_flow_create(struct priv *priv,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error)
+{
+	struct rte_flow *rte_flow;
+	struct mlx4_flow_action action;
+	struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr), };
+	int err;
+
+	err = priv_flow_validate(priv, attr, items, actions, error, &flow);
+	if (err)
+		return NULL;
+	flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
+	if (!flow.ibv_attr) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "cannot allocate ibv_attr memory");
+		return NULL;
+	}
+	flow.offset = sizeof(struct ibv_flow_attr);
+	*flow.ibv_attr = (struct ibv_flow_attr){
+		.comp_mask = 0,
+		.type = IBV_FLOW_ATTR_NORMAL,
+		.size = sizeof(struct ibv_flow_attr),
+		.priority = attr->priority,
+		.num_of_specs = 0,
+		.port = priv->port,
+		.flags = 0,
+	};
+	claim_zero(priv_flow_validate(priv, attr, items, actions,
+				      error, &flow));
+	action = (struct mlx4_flow_action){
+		.queue = 0,
+		.drop = 0,
+	};
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+			continue;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+			action.queue = 1;
+			action.queue_id =
+				((const struct rte_flow_action_queue *)
+				 actions->conf)->index;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+			action.drop = 1;
+		} else {
+			rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions, "unsupported action");
+			goto exit;
+		}
+	}
+	rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
+						 &action, error);
+	return rte_flow;
+exit:
+	rte_free(flow.ibv_attr);
+	return NULL;
+}
+
+/**
+ * Create a flow.
+ *
+ * @see rte_flow_create()
+ * @see rte_flow_ops
+ */
+struct rte_flow *
+mlx4_flow_create(struct rte_eth_dev *dev,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct rte_flow *flow;
+
+	priv_lock(priv);
+	flow = priv_flow_create(priv, attr, items, actions, error);
+	if (flow) {
+		LIST_INSERT_HEAD(&priv->flows, flow, next);
+		DEBUG("Flow created %p", (void *)flow);
+	}
+	priv_unlock(priv);
+	return flow;
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] flow
+ *   Flow to destroy.
+ */
+static void
+priv_flow_destroy(struct priv *priv, struct rte_flow *flow)
+{
+	(void)priv;
+	LIST_REMOVE(flow, next);
+	if (flow->ibv_flow)
+		claim_zero(ibv_destroy_flow(flow->ibv_flow));
+	if (flow->qp)
+		claim_zero(ibv_destroy_qp(flow->qp));
+	if (flow->cq)
+		claim_zero(ibv_destroy_cq(flow->cq));
+	rte_free(flow->ibv_attr);
+	DEBUG("Flow destroyed %p", (void *)flow);
+	rte_free(flow);
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_destroy(struct rte_eth_dev *dev,
+		  struct rte_flow *flow,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	(void)error;
+	priv_lock(priv);
+	priv_flow_destroy(priv, flow);
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+priv_flow_flush(struct priv *priv)
+{
+	while (!LIST_EMPTY(&priv->flows)) {
+		struct rte_flow *flow;
+
+		flow = LIST_FIRST(&priv->flows);
+		priv_flow_destroy(priv, flow);
+	}
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_flush(struct rte_eth_dev *dev,
+		struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	(void)error;
+	priv_lock(priv);
+	priv_flow_flush(priv);
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
+ * Remove all flows.
+ *
+ * Called by dev_stop() to remove all flows.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+void
+mlx4_priv_flow_stop(struct priv *priv)
+{
+	struct rte_flow *flow;
+
+	for (flow = LIST_FIRST(&priv->flows);
+	     flow;
+	     flow = LIST_NEXT(flow, next)) {
+		claim_zero(ibv_destroy_flow(flow->ibv_flow));
+		flow->ibv_flow = NULL;
+		DEBUG("Flow %p removed", (void *)flow);
+	}
+}
+
+/**
+ * Add all flows.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success, a errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_priv_flow_start(struct priv *priv)
+{
+	struct ibv_qp *qp;
+	struct rte_flow *flow;
+
+	for (flow = LIST_FIRST(&priv->flows);
+	     flow;
+	     flow = LIST_NEXT(flow, next)) {
+		qp = flow->qp ? flow->qp : flow->rxq->qp;
+		flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
+		if (!flow->ibv_flow) {
+			DEBUG("Flow %p cannot be applied", (void *)flow);
+			rte_errno = EINVAL;
+			return rte_errno;
+		}
+		DEBUG("Flow %p applied", (void *)flow);
+	}
+	return 0;
+}
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
new file mode 100644
index 0000000..537ffdf
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -0,0 +1,104 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_PMD_MLX4_FLOW_H_
+#define RTE_PMD_MLX4_FLOW_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/queue.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_byteorder.h>
+
+#include "mlx4.h"
+
+struct rte_flow {
+	LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
+	struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
+	struct ibv_flow *ibv_flow; /**< Verbs flow. */
+	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+	struct ibv_qp *qp; /**< Verbs queue pair. */
+	struct ibv_cq *cq; /**< Verbs completion queue. */
+};
+
+int
+mlx4_flow_validate(struct rte_eth_dev *dev,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error);
+
+struct rte_flow *
+mlx4_flow_create(struct rte_eth_dev *dev,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error);
+
+int
+mlx4_flow_destroy(struct rte_eth_dev *,
+		  struct rte_flow *,
+		  struct rte_flow_error *);
+
+int
+mlx4_flow_flush(struct rte_eth_dev *dev,
+		struct rte_flow_error *error);
+
+/** Structure to pass to the conversion function. */
+struct mlx4_flow {
+	struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
+	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
+};
+
+struct mlx4_flow_action {
+	uint32_t drop:1; /**< Target is a drop queue. */
+	uint32_t queue:1; /**< Target is a receive queue. */
+	uint32_t queue_id; /**< Identifier of the queue. */
+};
+
+int mlx4_priv_flow_start(struct priv *);
+void mlx4_priv_flow_stop(struct priv *);
+
+#endif /* RTE_PMD_MLX4_FLOW_H_ */
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file
  2017-02-21 14:07 [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
  2017-02-21 14:07 ` [dpdk-dev] [PATCH 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
@ 2017-02-22  8:37 ` Nélio Laranjeiro
  2017-02-22 13:42 ` [dpdk-dev] [PATCH v2 " Vasily Philipov
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Nélio Laranjeiro @ 2017-02-22  8:37 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Adrien Mazarguil

On Tue, Feb 21, 2017 at 02:07:02PM +0000, Vasily Philipov wrote:
> Make some structs/defines visible from different source files by placing
> them into mlx4.h header.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> ---
>  drivers/net/mlx4/mlx4.c | 183 ++--------------------------------------------
>  drivers/net/mlx4/mlx4.h | 187 +++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 189 insertions(+), 181 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index 79efaaa..82ccac8 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -1,8 +1,8 @@
>  /*-
>   *   BSD LICENSE
>   *
> - *   Copyright 2012-2015 6WIND S.A.
> - *   Copyright 2012 Mellanox.
> + *   Copyright 2012-2017 6WIND S.A.
> + *   Copyright 2012-2017 Mellanox.
>   *
>   *   Redistribution and use in source and binary forms, with or without
>   *   modification, are permitted provided that the following conditions
> @@ -68,10 +68,6 @@
>  #pragma GCC diagnostic error "-Wpedantic"
>  #endif
>  
> -/* DPDK headers don't like -pedantic. */
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic ignored "-Wpedantic"
> -#endif
>  #include <rte_ether.h>
>  #include <rte_ethdev.h>
>  #include <rte_dev.h>
> @@ -86,9 +82,6 @@
>  #include <rte_log.h>
>  #include <rte_alarm.h>
>  #include <rte_memory.h>
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic error "-Wpedantic"
> -#endif
>  
>  /* Generated configuration header. */
>  #include "mlx4_autoconf.h"
> @@ -96,21 +89,6 @@
>  /* PMD header. */
>  #include "mlx4.h"
>  
> -/* Runtime logging through RTE_LOG() is enabled when not in debugging mode.
> - * Intermediate LOG_*() macros add the required end-of-line characters. */
> -#ifndef NDEBUG
> -#define INFO(...) DEBUG(__VA_ARGS__)
> -#define WARN(...) DEBUG(__VA_ARGS__)
> -#define ERROR(...) DEBUG(__VA_ARGS__)
> -#else
> -#define LOG__(level, m, ...) \
> -	RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
> -#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
> -#define INFO(...) LOG_(INFO, __VA_ARGS__)
> -#define WARN(...) LOG_(WARNING, __VA_ARGS__)
> -#define ERROR(...) LOG_(ERR, __VA_ARGS__)
> -#endif
> -
>  /* Convenience macros for accessing mbuf fields. */
>  #define NEXT(m) ((m)->next)
>  #define DATA_LEN(m) ((m)->data_len)
> @@ -137,157 +115,6 @@
>  	 (((val) & (from)) / ((from) / (to))) : \
>  	 (((val) & (from)) * ((to) / (from))))
>  
> -struct mlx4_rxq_stats {
> -	unsigned int idx; /**< Mapping index. */
> -#ifdef MLX4_PMD_SOFT_COUNTERS
> -	uint64_t ipackets;  /**< Total of successfully received packets. */
> -	uint64_t ibytes;    /**< Total of successfully received bytes. */
> -#endif
> -	uint64_t idropped;  /**< Total of packets dropped when RX ring full. */
> -	uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
> -};
> -
> -struct mlx4_txq_stats {
> -	unsigned int idx; /**< Mapping index. */
> -#ifdef MLX4_PMD_SOFT_COUNTERS
> -	uint64_t opackets; /**< Total of successfully sent packets. */
> -	uint64_t obytes;   /**< Total of successfully sent bytes. */
> -#endif
> -	uint64_t odropped; /**< Total of packets not sent when TX ring full. */
> -};
> -
> -/* RX element (scattered packets). */
> -struct rxq_elt_sp {
> -	struct ibv_recv_wr wr; /* Work Request. */
> -	struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
> -	struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
> -};
> -
> -/* RX element. */
> -struct rxq_elt {
> -	struct ibv_recv_wr wr; /* Work Request. */
> -	struct ibv_sge sge; /* Scatter/Gather Element. */
> -	/* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
> -};
> -
> -/* RX queue descriptor. */
> -struct rxq {
> -	struct priv *priv; /* Back pointer to private data. */
> -	struct rte_mempool *mp; /* Memory Pool for allocations. */
> -	struct ibv_mr *mr; /* Memory Region (for mp). */
> -	struct ibv_cq *cq; /* Completion Queue. */
> -	struct ibv_qp *qp; /* Queue Pair. */
> -	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> -	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> -	/*
> -	 * Each VLAN ID requires a separate flow steering rule.
> -	 */
> -	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> -	struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
> -	struct ibv_flow *promisc_flow; /* Promiscuous flow. */
> -	struct ibv_flow *allmulti_flow; /* Multicast flow. */
> -	unsigned int port_id; /* Port ID for incoming packets. */
> -	unsigned int elts_n; /* (*elts)[] length. */
> -	unsigned int elts_head; /* Current index in (*elts)[]. */
> -	union {
> -		struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
> -		struct rxq_elt (*no_sp)[]; /* RX elements. */
> -	} elts;
> -	unsigned int sp:1; /* Use scattered RX elements. */
> -	unsigned int csum:1; /* Enable checksum offloading. */
> -	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
> -	struct mlx4_rxq_stats stats; /* RX queue counters. */
> -	unsigned int socket; /* CPU socket ID for allocations. */
> -	struct ibv_exp_res_domain *rd; /* Resource Domain. */
> -};
> -
> -/* TX element. */
> -struct txq_elt {
> -	struct rte_mbuf *buf;
> -};
> -
> -/* Linear buffer type. It is used when transmitting buffers with too many
> - * segments that do not fit the hardware queue (see max_send_sge).
> - * Extra segments are copied (linearized) in such buffers, replacing the
> - * last SGE during TX.
> - * The size is arbitrary but large enough to hold a jumbo frame with
> - * 8 segments considering mbuf.buf_len is about 2048 bytes. */
> -typedef uint8_t linear_t[16384];
> -
> -/* TX queue descriptor. */
> -struct txq {
> -	struct priv *priv; /* Back pointer to private data. */
> -	struct {
> -		const struct rte_mempool *mp; /* Cached Memory Pool. */
> -		struct ibv_mr *mr; /* Memory Region (for mp). */
> -		uint32_t lkey; /* mr->lkey */
> -	} mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
> -	struct ibv_cq *cq; /* Completion Queue. */
> -	struct ibv_qp *qp; /* Queue Pair. */
> -	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> -	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> -#if MLX4_PMD_MAX_INLINE > 0
> -	uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
> -#endif
> -	unsigned int elts_n; /* (*elts)[] length. */
> -	struct txq_elt (*elts)[]; /* TX elements. */
> -	unsigned int elts_head; /* Current index in (*elts)[]. */
> -	unsigned int elts_tail; /* First element awaiting completion. */
> -	unsigned int elts_comp; /* Number of completion requests. */
> -	unsigned int elts_comp_cd; /* Countdown for next completion request. */
> -	unsigned int elts_comp_cd_init; /* Initial value for countdown. */
> -	struct mlx4_txq_stats stats; /* TX queue counters. */
> -	linear_t (*elts_linear)[]; /* Linearized buffers. */
> -	struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
> -	unsigned int socket; /* CPU socket ID for allocations. */
> -	struct ibv_exp_res_domain *rd; /* Resource Domain. */
> -};
> -
> -struct priv {
> -	struct rte_eth_dev *dev; /* Ethernet device. */
> -	struct ibv_context *ctx; /* Verbs context. */
> -	struct ibv_device_attr device_attr; /* Device properties. */
> -	struct ibv_pd *pd; /* Protection Domain. */
> -	/*
> -	 * MAC addresses array and configuration bit-field.
> -	 * An extra entry that cannot be modified by the DPDK is reserved
> -	 * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
> -	 */
> -	struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
> -	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> -	/* VLAN filters. */
> -	struct {
> -		unsigned int enabled:1; /* If enabled. */
> -		unsigned int id:12; /* VLAN ID (0-4095). */
> -	} vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
> -	/* Device properties. */
> -	uint16_t mtu; /* Configured MTU. */
> -	uint8_t port; /* Physical port number. */
> -	unsigned int started:1; /* Device started, flows enabled. */
> -	unsigned int promisc:1; /* Device in promiscuous mode. */
> -	unsigned int allmulti:1; /* Device receives all multicast packets. */
> -	unsigned int hw_qpg:1; /* QP groups are supported. */
> -	unsigned int hw_tss:1; /* TSS is supported. */
> -	unsigned int hw_rss:1; /* RSS is supported. */
> -	unsigned int hw_csum:1; /* Checksum offload is supported. */
> -	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
> -	unsigned int rss:1; /* RSS is enabled. */
> -	unsigned int vf:1; /* This is a VF device. */
> -	unsigned int pending_alarm:1; /* An alarm is pending. */
> -#ifdef INLINE_RECV
> -	unsigned int inl_recv_size; /* Inline recv size */
> -#endif
> -	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
> -	/* RX/TX queues. */
> -	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
> -	unsigned int rxqs_n; /* RX queues array size. */
> -	unsigned int txqs_n; /* TX queues array size. */
> -	struct rxq *(*rxqs)[]; /* RX queues. */
> -	struct txq *(*txqs)[]; /* TX queues. */
> -	struct rte_intr_handle intr_handle; /* Interrupt handler. */
> -	rte_spinlock_t lock; /* Lock for control functions. */
> -};
> -
>  /* Local storage for secondary process data. */
>  struct mlx4_secondary_data {
>  	struct rte_eth_dev_data data; /* Local device data. */
> @@ -335,8 +162,7 @@ struct mlx4_secondary_data {
>   * @param priv
>   *   Pointer to private structure.
>   */
> -static void
> -priv_lock(struct priv *priv)
> +void priv_lock(struct priv *priv)
>  {
>  	rte_spinlock_lock(&priv->lock);
>  }
> @@ -347,8 +173,7 @@ struct mlx4_secondary_data {
>   * @param priv
>   *   Pointer to private structure.
>   */
> -static void
> -priv_unlock(struct priv *priv)
> +void priv_unlock(struct priv *priv)
>  {
>  	rte_spinlock_unlock(&priv->lock);
>  }
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index 4c7505e..70c9ecd 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -1,8 +1,8 @@
>  /*-
>   *   BSD LICENSE
>   *
> - *   Copyright 2012-2015 6WIND S.A.
> - *   Copyright 2012 Mellanox.
> + *   Copyright 2012-2017 6WIND S.A.
> + *   Copyright 2012-2017 Mellanox.
>   *
>   *   Redistribution and use in source and binary forms, with or without
>   *   modification, are permitted provided that the following conditions
> @@ -39,6 +39,33 @@
>  #include <limits.h>
>  
>  /*
> + * Runtime logging through RTE_LOG() is enabled when not in debugging mode.
> + * Intermediate LOG_*() macros add the required end-of-line characters.
> + */
> +#ifndef NDEBUG
> +#define INFO(...) DEBUG(__VA_ARGS__)
> +#define WARN(...) DEBUG(__VA_ARGS__)
> +#define ERROR(...) DEBUG(__VA_ARGS__)
> +#else
> +#define LOG__(level, m, ...) \
> +	RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
> +#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
> +#define INFO(...) LOG_(INFO, __VA_ARGS__)
> +#define WARN(...) LOG_(WARNING, __VA_ARGS__)
> +#define ERROR(...) LOG_(ERR, __VA_ARGS__)
> +#endif
> +
> +/* Verbs header. */
> +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic ignored "-Wpedantic"
> +#endif
> +#include <infiniband/verbs.h>
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic error "-Wpedantic"
> +#endif
> +
> +/*
>   * Maximum number of simultaneous MAC addresses supported.
>   *
>   * According to ConnectX's Programmer Reference Manual:
> @@ -160,4 +187,160 @@ enum {
>  #define claim_positive(...) (__VA_ARGS__)
>  #endif /* NDEBUG */
>  
> +struct mlx4_rxq_stats {
> +	unsigned int idx; /**< Mapping index. */
> +#ifdef MLX4_PMD_SOFT_COUNTERS
> +	uint64_t ipackets; /**< Total of successfully received packets. */
> +	uint64_t ibytes; /**< Total of successfully received bytes. */
> +#endif
> +	uint64_t idropped; /**< Total of packets dropped when RX ring full. */
> +	uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
> +};
> +
> +/* RX element (scattered packets). */
> +struct rxq_elt_sp {
> +	struct ibv_recv_wr wr; /* Work Request. */
> +	struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
> +	struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
> +};
> +
> +/* RX element. */
> +struct rxq_elt {
> +	struct ibv_recv_wr wr; /* Work Request. */
> +	struct ibv_sge sge; /* Scatter/Gather Element. */
> +	/* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
> +};
> +
> +/* RX queue descriptor. */
> +struct rxq {
> +	struct priv *priv; /* Back pointer to private data. */
> +	struct rte_mempool *mp; /* Memory Pool for allocations. */
> +	struct ibv_mr *mr; /* Memory Region (for mp). */
> +	struct ibv_cq *cq; /* Completion Queue. */
> +	struct ibv_qp *qp; /* Queue Pair. */
> +	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> +	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> +	/*
> +	 * Each VLAN ID requires a separate flow steering rule.
> +	 */
> +	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> +	struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
> +	struct ibv_flow *promisc_flow; /* Promiscuous flow. */
> +	struct ibv_flow *allmulti_flow; /* Multicast flow. */
> +	unsigned int port_id; /* Port ID for incoming packets. */
> +	unsigned int elts_n; /* (*elts)[] length. */
> +	unsigned int elts_head; /* Current index in (*elts)[]. */
> +	union {
> +		struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
> +		struct rxq_elt (*no_sp)[]; /* RX elements. */
> +	} elts;
> +	unsigned int sp:1; /* Use scattered RX elements. */
> +	unsigned int csum:1; /* Enable checksum offloading. */
> +	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
> +	struct mlx4_rxq_stats stats; /* RX queue counters. */
> +	unsigned int socket; /* CPU socket ID for allocations. */
> +	struct ibv_exp_res_domain *rd; /* Resource Domain. */
> +};
> +
> +/* TX element. */
> +struct txq_elt {
> +	struct rte_mbuf *buf;
> +};
> +
> +struct mlx4_txq_stats {
> +	unsigned int idx; /**< Mapping index. */
> +#ifdef MLX4_PMD_SOFT_COUNTERS
> +	uint64_t opackets; /**< Total of successfully sent packets. */
> +	uint64_t obytes;   /**< Total of successfully sent bytes. */
> +#endif
> +	uint64_t odropped; /**< Total of packets not sent when TX ring full. */
> +};
> +
> +/*
> + * Linear buffer type. It is used when transmitting buffers with too many
> + * segments that do not fit the hardware queue (see max_send_sge).
> + * Extra segments are copied (linearized) in such buffers, replacing the
> + * last SGE during TX.
> + * The size is arbitrary but large enough to hold a jumbo frame with
> + * 8 segments considering mbuf.buf_len is about 2048 bytes.
> + */
> +typedef uint8_t linear_t[16384];
> +
> +/* TX queue descriptor. */
> +struct txq {
> +	struct priv *priv; /* Back pointer to private data. */
> +	struct {
> +		const struct rte_mempool *mp; /* Cached Memory Pool. */
> +		struct ibv_mr *mr; /* Memory Region (for mp). */
> +		uint32_t lkey; /* mr->lkey */
> +	} mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
> +	struct ibv_cq *cq; /* Completion Queue. */
> +	struct ibv_qp *qp; /* Queue Pair. */
> +	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> +	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> +#if MLX4_PMD_MAX_INLINE > 0
> +	uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
> +#endif
> +	unsigned int elts_n; /* (*elts)[] length. */
> +	struct txq_elt (*elts)[]; /* TX elements. */
> +	unsigned int elts_head; /* Current index in (*elts)[]. */
> +	unsigned int elts_tail; /* First element awaiting completion. */
> +	unsigned int elts_comp; /* Number of completion requests. */
> +	unsigned int elts_comp_cd; /* Countdown for next completion request. */
> +	unsigned int elts_comp_cd_init; /* Initial value for countdown. */
> +	struct mlx4_txq_stats stats; /* TX queue counters. */
> +	linear_t (*elts_linear)[]; /* Linearized buffers. */
> +	struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
> +	unsigned int socket; /* CPU socket ID for allocations. */
> +	struct ibv_exp_res_domain *rd; /* Resource Domain. */
> +};
> +
> +struct priv {
> +	struct rte_eth_dev *dev; /* Ethernet device. */
> +	struct ibv_context *ctx; /* Verbs context. */
> +	struct ibv_device_attr device_attr; /* Device properties. */
> +	struct ibv_pd *pd; /* Protection Domain. */
> +	/*
> +	 * MAC addresses array and configuration bit-field.
> +	 * An extra entry that cannot be modified by the DPDK is reserved
> +	 * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
> +	 */
> +	struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
> +	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> +	/* VLAN filters. */
> +	struct {
> +		unsigned int enabled:1; /* If enabled. */
> +		unsigned int id:12; /* VLAN ID (0-4095). */
> +	} vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
> +	/* Device properties. */
> +	uint16_t mtu; /* Configured MTU. */
> +	uint8_t port; /* Physical port number. */
> +	unsigned int started:1; /* Device started, flows enabled. */
> +	unsigned int promisc:1; /* Device in promiscuous mode. */
> +	unsigned int allmulti:1; /* Device receives all multicast packets. */
> +	unsigned int hw_qpg:1; /* QP groups are supported. */
> +	unsigned int hw_tss:1; /* TSS is supported. */
> +	unsigned int hw_rss:1; /* RSS is supported. */
> +	unsigned int hw_csum:1; /* Checksum offload is supported. */
> +	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
> +	unsigned int rss:1; /* RSS is enabled. */
> +	unsigned int vf:1; /* This is a VF device. */
> +	unsigned int pending_alarm:1; /* An alarm is pending. */
> +#ifdef INLINE_RECV
> +	unsigned int inl_recv_size; /* Inline recv size */
> +#endif
> +	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
> +	/* RX/TX queues. */
> +	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
> +	unsigned int rxqs_n; /* RX queues array size. */
> +	unsigned int txqs_n; /* TX queues array size. */
> +	struct rxq *(*rxqs)[]; /* RX queues. */
> +	struct txq *(*txqs)[]; /* TX queues. */
> +	struct rte_intr_handle intr_handle; /* Interrupt handler. */
> +	rte_spinlock_t lock; /* Lock for control functions. */
> +};
> +
> +void priv_lock(struct priv *priv);
> +void priv_unlock(struct priv *priv);
> +
>  #endif /* RTE_PMD_MLX4_H_ */
> -- 
> 1.8.3.1
> 

Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
-- 
Nélio Laranjeiro
6WIND

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net/mlx4: support basic flow items and actions
  2017-02-21 14:07 ` [dpdk-dev] [PATCH 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
@ 2017-02-22  8:37   ` Nélio Laranjeiro
  2017-02-22 10:10     ` Nélio Laranjeiro
  0 siblings, 1 reply; 15+ messages in thread
From: Nélio Laranjeiro @ 2017-02-22  8:37 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Adrien Mazarguil

On Tue, Feb 21, 2017 at 02:07:03PM +0000, Vasily Philipov wrote:
> Adding support for the next items: eth, vlan, ipv4, udp, tcp and for the
> next actions: queue, drop
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> ---
>  drivers/net/mlx4/Makefile    |    3 +-
>  drivers/net/mlx4/mlx4.c      |   60 ++-
>  drivers/net/mlx4/mlx4.h      |    3 +
>  drivers/net/mlx4/mlx4_flow.c | 1053 ++++++++++++++++++++++++++++++++++++++++++
>  drivers/net/mlx4/mlx4_flow.h |  104 +++++
>  5 files changed, 1220 insertions(+), 3 deletions(-)
>  create mode 100644 drivers/net/mlx4/mlx4_flow.c
>  create mode 100644 drivers/net/mlx4/mlx4_flow.h
> 
> diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
> index 68c5902..1d463f7 100644
> --- a/drivers/net/mlx4/Makefile
> +++ b/drivers/net/mlx4/Makefile
> @@ -36,6 +36,7 @@ LIB = librte_pmd_mlx4.a
>  
>  # Sources.
>  SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c
> +SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_flow.c
>  
>  # Dependencies.
>  DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += lib/librte_ether
> @@ -129,7 +130,7 @@ mlx4_autoconf.h: mlx4_autoconf.h.new
>  		cmp '$<' '$@' $(AUTOCONF_OUTPUT) || \
>  		mv '$<' '$@'
>  
> -mlx4.o: mlx4_autoconf.h
> +$(SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD):.c=.o): mlx4_autoconf.h
>  
>  clean_mlx4: FORCE
>  	$Q rm -f -- mlx4_autoconf.h mlx4_autoconf.h.new
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index 82ccac8..cc2ebfa 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -82,12 +82,14 @@
>  #include <rte_log.h>
>  #include <rte_alarm.h>
>  #include <rte_memory.h>
> +#include <rte_flow.h>
>  
>  /* Generated configuration header. */
>  #include "mlx4_autoconf.h"
>  
> -/* PMD header. */
> +/* PMD headers. */
>  #include "mlx4.h"
> +#include "mlx4_flow.h"
>  
>  /* Convenience macros for accessing mbuf fields. */
>  #define NEXT(m) ((m)->next)
> @@ -2351,6 +2353,7 @@ struct txq_mp2mr_mbuf_check_data {
>  	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
>  	*attr = (struct ibv_flow_attr){
>  		.type = IBV_FLOW_ATTR_NORMAL,
> +		.priority = 3,
>  		.num_of_specs = 1,
>  		.port = priv->port,
>  		.flags = 0
> @@ -3936,6 +3939,7 @@ struct txq_mp2mr_mbuf_check_data {
>  {
>  	struct priv *priv = dev->data->dev_private;
>  	unsigned int i = 0;
> +	unsigned int err = 0;
>  	unsigned int r;
>  	struct rxq *rxq;
>  
> @@ -3985,8 +3989,9 @@ struct txq_mp2mr_mbuf_check_data {
>  		return -ret;
>  	} while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
>  	priv_dev_interrupt_handler_install(priv, dev);
> +	err = mlx4_priv_flow_start(priv);
>  	priv_unlock(priv);
> -	return 0;
> +	return -err;
>  }
>  
>  /**
> @@ -4021,6 +4026,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		rxq = (*priv->rxqs)[0];
>  		r = priv->rxqs_n;
>  	}
> +	mlx4_priv_flow_stop(priv);
>  	/* Iterate only once when RSS is enabled. */
>  	do {
>  		/* Ignore nonexistent RX queues. */
> @@ -5022,6 +5028,55 @@ struct txq_mp2mr_mbuf_check_data {
>  	return -ret;
>  }
>  
> +const struct rte_flow_ops mlx4_flow_ops = {
> +	.validate = mlx4_flow_validate,
> +	.create = mlx4_flow_create,
> +	.destroy = mlx4_flow_destroy,
> +	.flush = mlx4_flow_flush,
> +	.query = NULL,
> +};
> +
> +/**
> + * Manage filter operations.
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + * @param filter_type
> + *   Filter type.
> + * @param filter_op
> + *   Operation to perform.
> + * @param arg
> + *   Pointer to operation-specific structure.
> + *
> + * @return
> + *   0 on success, negative errno value on failure.
> + */
> +static int
> +mlx4_dev_filter_ctrl(struct rte_eth_dev *dev,
> +		     enum rte_filter_type filter_type,
> +		     enum rte_filter_op filter_op,
> +		     void *arg)
> +{
> +	int ret = EINVAL;
> +
> +	switch (filter_type) {
> +	case RTE_ETH_FILTER_GENERIC:
> +		if (filter_op != RTE_ETH_FILTER_GET)
> +			return -EINVAL;
> +		*(const void **)arg = &mlx4_flow_ops;
> +		return 0;
> +	case RTE_ETH_FILTER_FDIR:
> +		DEBUG("%p: filter type FDIR is not supported by this PMD",
> +		      (void *)dev);
> +		break;
> +	default:
> +		ERROR("%p: filter type (%d) not supported",
> +		      (void *)dev, filter_type);
> +		break;
> +	}
> +	return -ret;
> +}
> +
>  static const struct eth_dev_ops mlx4_dev_ops = {
>  	.dev_configure = mlx4_dev_configure,
>  	.dev_start = mlx4_dev_start,
> @@ -5056,6 +5111,7 @@ struct txq_mp2mr_mbuf_check_data {
>  	.mac_addr_add = mlx4_mac_addr_add,
>  	.mac_addr_set = mlx4_mac_addr_set,
>  	.mtu_set = mlx4_dev_set_mtu,
> +	.filter_ctrl = mlx4_dev_filter_ctrl,
>  };
>  
>  /**
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index 70c9ecd..fac408b 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -295,6 +295,8 @@ struct txq {
>  	struct ibv_exp_res_domain *rd; /* Resource Domain. */
>  };
>  
> +struct rte_flow;
> +
>  struct priv {
>  	struct rte_eth_dev *dev; /* Ethernet device. */
>  	struct ibv_context *ctx; /* Verbs context. */
> @@ -337,6 +339,7 @@ struct priv {
>  	struct rxq *(*rxqs)[]; /* RX queues. */
>  	struct txq *(*txqs)[]; /* TX queues. */
>  	struct rte_intr_handle intr_handle; /* Interrupt handler. */
> +	LIST_HEAD(mlx4_flows, rte_flow) flows;
>  	rte_spinlock_t lock; /* Lock for control functions. */
>  };
>  
> diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
> new file mode 100644
> index 0000000..2328a18
> --- /dev/null
> +++ b/drivers/net/mlx4/mlx4_flow.c
> @@ -0,0 +1,1053 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright 2017 6WIND S.A.
> + *   Copyright 2017 Mellanox.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of 6WIND S.A. nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <assert.h>
> +
> +/* Verbs header. */
> +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic ignored "-Wpedantic"
> +#endif
> +#include <infiniband/verbs.h>
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic error "-Wpedantic"
> +#endif
> +
> +#include <rte_flow.h>
> +#include <rte_flow_driver.h>
> +#include <rte_malloc.h>
> +
> +/* Generated configuration header. */
> +#include "mlx4_autoconf.h"
> +
> +/* PMD headers. */
> +#include "mlx4.h"
> +#include "mlx4_flow.h"
> +
> +/** Static initializer for items. */
> +#define ITEMS(...) \
> +	(const enum rte_flow_item_type []){ \
> +		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
> +	}
> +
> +/** Structure to generate a simple graph of layers supported by the NIC. */
> +struct mlx4_flow_items {
> +	/** List of possible actions for these items. */
> +	const enum rte_flow_action_type *const actions;
> +	/** Bit-masks corresponding to the possibilities for the item. */
> +	const void *mask;
> +	/**
> +	 * Default bit-masks to use when item->mask is not provided. When
> +	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
> +	 * used instead.
> +	 */
> +	const void *default_mask;
> +	/** Bit-masks size in bytes. */
> +	const unsigned int mask_sz;
> +	/**
> +	 * Check support for a given item.
> +	 *
> +	 * @param item[in]
> +	 *   Item specification.
> +	 * @param mask[in]
> +	 *   Bit-masks covering supported fields to compare with spec,
> +	 *   last and mask in
> +	 *   \item.
> +	 * @param size
> +	 *   Bit-Mask size in bytes.
> +	 *
> +	 * @return
> +	 *   0 on success, negative value otherwise.
> +	 */
> +	int (*validate)(const struct rte_flow_item *item,
> +			const uint8_t *mask, unsigned int size);
> +	/**
> +	 * Conversion function from rte_flow to NIC specific flow.
> +	 *
> +	 * @param item
> +	 *   rte_flow item to convert.
> +	 * @param default_mask
> +	 *   Default bit-masks to use when item->mask is not provided.
> +	 * @param data
> +	 *   Internal structure to store the conversion.
> +	 *
> +	 * @return
> +	 *   0 on success, negative value otherwise.
> +	 */
> +	int (*convert)(const struct rte_flow_item *item,
> +		       const void *default_mask,
> +		       void *data);
> +	/** Size in bytes of the destination structure. */
> +	const unsigned int dst_sz;
> +	/** List of possible following items.  */
> +	const enum rte_flow_item_type *const items;
> +};
> +
> +/** Valid action for this PMD. */
> +static const enum rte_flow_action_type valid_actions[] = {
> +	RTE_FLOW_ACTION_TYPE_DROP,
> +	RTE_FLOW_ACTION_TYPE_QUEUE,
> +	RTE_FLOW_ACTION_TYPE_END,
> +};
> +
> +/**
> + * Convert Ethernet item to Verbs specification.
> + *
> + * @param item[in]
> + *   Item specification.
> + * @param default_mask[in]
> + *   Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + *   User structure.
> + */
> +static int
> +mlx4_flow_create_eth(const struct rte_flow_item *item,
> +		     const void *default_mask,
> +		     void *data)
> +{
> +	const struct rte_flow_item_eth *spec = item->spec;
> +	const struct rte_flow_item_eth *mask = item->mask;
> +	struct mlx4_flow *flow = (struct mlx4_flow *)data;
> +	struct ibv_flow_spec_eth *eth;
> +	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
> +	unsigned int i;
> +
> +	++flow->ibv_attr->num_of_specs;
> +	flow->ibv_attr->priority = 2;
> +	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> +	*eth = (struct ibv_flow_spec_eth) {
> +		.type = IBV_FLOW_SPEC_ETH,
> +		.size = eth_size,
> +	};
> +	if (!spec) {
> +		flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
> +		return 0;
> +	}
> +	if (!mask)
> +		mask = default_mask;
> +	memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
> +	memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
> +	memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
> +	memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
> +	/* Remove unwanted bits from values. */
> +	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
> +		eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
> +		eth->val.src_mac[i] &= eth->mask.src_mac[i];
> +	}
> +	return 0;
> +}
> +
> +/**
> + * Convert VLAN item to Verbs specification.
> + *
> + * @param item[in]
> + *   Item specification.
> + * @param default_mask[in]
> + *   Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + *   User structure.
> + */
> +static int
> +mlx4_flow_create_vlan(const struct rte_flow_item *item,
> +		      const void *default_mask,
> +		      void *data)
> +{
> +	const struct rte_flow_item_vlan *spec = item->spec;
> +	const struct rte_flow_item_vlan *mask = item->mask;
> +	struct mlx4_flow *flow = (struct mlx4_flow *)data;
> +	struct ibv_flow_spec_eth *eth;
> +	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
> +
> +	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
> +	if (!spec)
> +		return 0;
> +	if (!mask)
> +		mask = default_mask;
> +	eth->val.vlan_tag = spec->tci;
> +	eth->mask.vlan_tag = mask->tci;
> +	eth->val.vlan_tag &= eth->mask.vlan_tag;
> +	return 0;
> +}
> +
> +/**
> + * Convert IPv4 item to Verbs specification.
> + *
> + * @param item[in]
> + *   Item specification.
> + * @param default_mask[in]
> + *   Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + *   User structure.
> + */
> +static int
> +mlx4_flow_create_ipv4(const struct rte_flow_item *item,
> +		      const void *default_mask,
> +		      void *data)
> +{
> +	const struct rte_flow_item_ipv4 *spec = item->spec;
> +	const struct rte_flow_item_ipv4 *mask = item->mask;
> +	struct mlx4_flow *flow = (struct mlx4_flow *)data;
> +	struct ibv_flow_spec_ipv4 *ipv4;
> +	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4);
> +
> +	++flow->ibv_attr->num_of_specs;
> +	flow->ibv_attr->priority = 1;
> +	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> +	*ipv4 = (struct ibv_flow_spec_ipv4) {
> +		.type = IBV_FLOW_SPEC_IPV4,
> +		.size = ipv4_size,
> +	};
> +	if (!spec)
> +		return 0;
> +	ipv4->val = (struct ibv_flow_ipv4_filter) {
> +		.src_ip = spec->hdr.src_addr,
> +		.dst_ip = spec->hdr.dst_addr,
> +	};
> +	if (!mask)
> +		mask = default_mask;
> +	ipv4->mask = (struct ibv_flow_ipv4_filter) {
> +		.src_ip = mask->hdr.src_addr,
> +		.dst_ip = mask->hdr.dst_addr,
> +	};
> +	/* Remove unwanted bits from values. */
> +	ipv4->val.src_ip &= ipv4->mask.src_ip;
> +	ipv4->val.dst_ip &= ipv4->mask.dst_ip;
> +	return 0;
> +}
> +
> +/**
> + * Convert UDP item to Verbs specification.
> + *
> + * @param item[in]
> + *   Item specification.
> + * @param default_mask[in]
> + *   Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + *   User structure.
> + */
> +static int
> +mlx4_flow_create_udp(const struct rte_flow_item *item,
> +		     const void *default_mask,
> +		     void *data)
> +{
> +	const struct rte_flow_item_udp *spec = item->spec;
> +	const struct rte_flow_item_udp *mask = item->mask;
> +	struct mlx4_flow *flow = (struct mlx4_flow *)data;
> +	struct ibv_flow_spec_tcp_udp *udp;
> +	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
> +
> +	++flow->ibv_attr->num_of_specs;
> +	flow->ibv_attr->priority = 0;
> +	udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> +	*udp = (struct ibv_flow_spec_tcp_udp) {
> +		.type = IBV_FLOW_SPEC_UDP,
> +		.size = udp_size,
> +	};
> +	if (!spec)
> +		return 0;
> +	udp->val.dst_port = spec->hdr.dst_port;
> +	udp->val.src_port = spec->hdr.src_port;
> +	if (!mask)
> +		mask = default_mask;
> +	udp->mask.dst_port = mask->hdr.dst_port;
> +	udp->mask.src_port = mask->hdr.src_port;
> +	/* Remove unwanted bits from values. */
> +	udp->val.src_port &= udp->mask.src_port;
> +	udp->val.dst_port &= udp->mask.dst_port;
> +	return 0;
> +}
> +
> +/**
> + * Convert TCP item to Verbs specification.
> + *
> + * @param item[in]
> + *   Item specification.
> + * @param default_mask[in]
> + *   Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + *   User structure.
> + */
> +static int
> +mlx4_flow_create_tcp(const struct rte_flow_item *item,
> +		     const void *default_mask,
> +		     void *data)
> +{
> +	const struct rte_flow_item_tcp *spec = item->spec;
> +	const struct rte_flow_item_tcp *mask = item->mask;
> +	struct mlx4_flow *flow = (struct mlx4_flow *)data;
> +	struct ibv_flow_spec_tcp_udp *tcp;
> +	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
> +
> +	++flow->ibv_attr->num_of_specs;
> +	flow->ibv_attr->priority = 0;
> +	tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> +	*tcp = (struct ibv_flow_spec_tcp_udp) {
> +		.type = IBV_FLOW_SPEC_TCP,
> +		.size = tcp_size,
> +	};
> +	if (!spec)
> +		return 0;
> +	tcp->val.dst_port = spec->hdr.dst_port;
> +	tcp->val.src_port = spec->hdr.src_port;
> +	if (!mask)
> +		mask = default_mask;
> +	tcp->mask.dst_port = mask->hdr.dst_port;
> +	tcp->mask.src_port = mask->hdr.src_port;
> +	/* Remove unwanted bits from values. */
> +	tcp->val.src_port &= tcp->mask.src_port;
> +	tcp->val.dst_port &= tcp->mask.dst_port;
> +	return 0;
> +}
> +
> +/**
> + * Check support for a given item.
> + *
> + * @param item[in]
> + *   Item specification.
> + * @param mask[in]
> + *   Bit-masks covering supported fields to compare with spec, last and mask in
> + *   \item.
> + * @param size
> + *   Bit-Mask size in bytes.
> + *
> + * @return
> + *   0 on success, negative value otherwise.
> + */
> +static int
> +mlx4_flow_item_validate(const struct rte_flow_item *item,
> +			const uint8_t *mask, unsigned int size)
> +{
> +	int ret = 0;
> +
> +	if (!item->spec && (item->mask || item->last))
> +		return -1;
> +	if (item->spec && !item->mask) {
> +		unsigned int i;
> +		const uint8_t *spec = item->spec;
> +
> +		for (i = 0; i < size; ++i)
> +			if ((spec[i] | mask[i]) != mask[i])
> +				return -1;
> +	}
> +	if (item->last && !item->mask) {
> +		unsigned int i;
> +		const uint8_t *spec = item->last;
> +
> +		for (i = 0; i < size; ++i)
> +			if ((spec[i] | mask[i]) != mask[i])
> +				return -1;
> +	}
> +	if (item->spec && item->last) {
> +		uint8_t spec[size];
> +		uint8_t last[size];
> +		const uint8_t *apply = mask;
> +		unsigned int i;
> +
> +		if (item->mask)
> +			apply = item->mask;
> +		for (i = 0; i < size; ++i) {
> +			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
> +			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
> +		}
> +		ret = memcmp(spec, last, size);
> +	}
> +	return ret;
> +}
> +
> +static int
> +mlx4_flow_validate_eth(const struct rte_flow_item *item,
> +		       const uint8_t *mask, unsigned int size)
> +{
> +	if (item->mask) {
> +		const struct rte_flow_item_eth *mask = item->mask;
> +
> +		if (mask->dst.addr_bytes[0] != 0xff ||
> +				mask->dst.addr_bytes[1] != 0xff ||
> +				mask->dst.addr_bytes[2] != 0xff ||
> +				mask->dst.addr_bytes[3] != 0xff ||
> +				mask->dst.addr_bytes[4] != 0xff ||
> +				mask->dst.addr_bytes[5] != 0xff)
> +			return -1;
> +	}
> +	return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_vlan(const struct rte_flow_item *item,
> +			const uint8_t *mask, unsigned int size)
> +{
> +	if (item->mask) {
> +		const struct rte_flow_item_vlan *mask = item->mask;
> +
> +		if (mask->tci != 0 &&
> +		    ntohs(mask->tci) != 0x0fff)
> +			return -1;
> +	}
> +	return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_ipv4(const struct rte_flow_item *item,
> +			const uint8_t *mask, unsigned int size)
> +{
> +	if (item->mask) {
> +		const struct rte_flow_item_ipv4 *mask = item->mask;
> +
> +		if (mask->hdr.src_addr != 0 &&
> +		    mask->hdr.src_addr != 0xffffffff)
> +			return -1;
> +		if (mask->hdr.dst_addr != 0 &&
> +		    mask->hdr.dst_addr != 0xffffffff)
> +			return -1;
> +	}
> +	return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_udp(const struct rte_flow_item *item,
> +		       const uint8_t *mask, unsigned int size)
> +{
> +	if (item->mask) {
> +		const struct rte_flow_item_udp *mask = item->mask;
> +
> +		if (mask->hdr.src_port != 0 &&
> +		    mask->hdr.src_port != 0xffff)
> +			return -1;
> +		if (mask->hdr.dst_port != 0 &&
> +		    mask->hdr.dst_port != 0xffff)
> +			return -1;
> +	}
> +	return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_tcp(const struct rte_flow_item *item,
> +		       const uint8_t *mask, unsigned int size)
> +{
> +	if (item->mask) {
> +		const struct rte_flow_item_tcp *mask = item->mask;
> +
> +		if (mask->hdr.src_port != 0 &&
> +		    mask->hdr.src_port != 0xffff)
> +			return -1;
> +		if (mask->hdr.dst_port != 0 &&
> +		    mask->hdr.dst_port != 0xffff)
> +			return -1;
> +	}
> +	return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +/** Graph of supported items and associated actions. */
> +static const struct mlx4_flow_items mlx4_flow_items[] = {
> +	[RTE_FLOW_ITEM_TYPE_END] = {
> +		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
> +	},
> +	[RTE_FLOW_ITEM_TYPE_ETH] = {
> +		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
> +			       RTE_FLOW_ITEM_TYPE_IPV4),
> +		.actions = valid_actions,
> +		.mask = &(const struct rte_flow_item_eth){
> +			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
> +			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
> +		},
> +		.default_mask = &rte_flow_item_eth_mask,
> +		.mask_sz = sizeof(struct rte_flow_item_eth),
> +		.validate = mlx4_flow_validate_eth,
> +		.convert = mlx4_flow_create_eth,
> +		.dst_sz = sizeof(struct ibv_flow_spec_eth),
> +	},
> +	[RTE_FLOW_ITEM_TYPE_VLAN] = {
> +		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4),
> +		.actions = valid_actions,
> +		.mask = &(const struct rte_flow_item_vlan){
> +		/* rte_flow_item_vlan_mask is invalid for mlx4. */
> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> +			.tci = 0x0fff,
> +#else
> +			.tci = 0xff0f,
> +#endif
> +		},
> +		.mask_sz = sizeof(struct rte_flow_item_vlan),
> +		.validate = mlx4_flow_validate_vlan,
> +		.convert = mlx4_flow_create_vlan,
> +		.dst_sz = 0,
> +	},
> +	[RTE_FLOW_ITEM_TYPE_IPV4] = {
> +		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
> +			       RTE_FLOW_ITEM_TYPE_TCP),
> +		.actions = valid_actions,
> +		.mask = &(const struct rte_flow_item_ipv4){
> +			.hdr = {
> +				.src_addr = -1,
> +				.dst_addr = -1,
> +			},
> +		},
> +		.default_mask = &rte_flow_item_ipv4_mask,
> +		.mask_sz = sizeof(struct rte_flow_item_ipv4),
> +		.validate = mlx4_flow_validate_ipv4,
> +		.convert = mlx4_flow_create_ipv4,
> +		.dst_sz = sizeof(struct ibv_flow_spec_ipv4),
> +	},
> +	[RTE_FLOW_ITEM_TYPE_UDP] = {
> +		.actions = valid_actions,
> +		.mask = &(const struct rte_flow_item_udp){
> +			.hdr = {
> +				.src_port = -1,
> +				.dst_port = -1,
> +			},
> +		},
> +		.default_mask = &rte_flow_item_udp_mask,
> +		.mask_sz = sizeof(struct rte_flow_item_udp),
> +		.validate = mlx4_flow_validate_udp,
> +		.convert = mlx4_flow_create_udp,
> +		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
> +	},
> +	[RTE_FLOW_ITEM_TYPE_TCP] = {
> +		.actions = valid_actions,
> +		.mask = &(const struct rte_flow_item_tcp){
> +			.hdr = {
> +				.src_port = -1,
> +				.dst_port = -1,
> +			},
> +		},
> +		.default_mask = &rte_flow_item_tcp_mask,
> +		.mask_sz = sizeof(struct rte_flow_item_tcp),
> +		.validate = mlx4_flow_validate_tcp,
> +		.convert = mlx4_flow_create_tcp,
> +		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
> +	},
> +};
> +
> +/**
> + * Validate a flow supported by the NIC.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param[in] attr
> + *   Flow rule attributes.
> + * @param[in] items
> + *   Pattern specification (list terminated by the END pattern item).
> + * @param[in] actions
> + *   Associated actions (list terminated by the END action).
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL.
> + * @param[in, out] flow
> + *   Flow structure to update.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +priv_flow_validate(struct priv *priv,
> +		   const struct rte_flow_attr *attr,
> +		   const struct rte_flow_item items[],
> +		   const struct rte_flow_action actions[],
> +		   struct rte_flow_error *error,
> +		   struct mlx4_flow *flow)
> +{
> +	const struct mlx4_flow_items *cur_item = mlx4_flow_items;
> +	struct mlx4_flow_action action = {
> +		.queue = 0,
> +		.drop = 0,
> +	};
> +
> +	(void)priv;
> +	if (attr->group) {
> +		rte_flow_error_set(error, ENOTSUP,
> +				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
> +				   NULL,
> +				   "groups are not supported");
> +		return -rte_errno;
> +	}
> +	if (attr->priority) {
> +		rte_flow_error_set(error, ENOTSUP,
> +				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
> +				   NULL,
> +				   "priorities are not supported");
> +		return -rte_errno;
> +	}
> +	if (attr->egress) {
> +		rte_flow_error_set(error, ENOTSUP,
> +				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
> +				   NULL,
> +				   "egress is not supported");
> +		return -rte_errno;
> +	}
> +	if (!attr->ingress) {
> +		rte_flow_error_set(error, ENOTSUP,
> +				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
> +				   NULL,
> +				   "only ingress is supported");
> +		return -rte_errno;
> +	}
> +	/* Go over items list. */
> +	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
> +		const struct mlx4_flow_items *token = NULL;
> +		unsigned int i;
> +		int err;
> +
> +		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
> +			continue;
> +		/*
> +		 * The nic can support patterns with NULL eth spec only
> +		 * if eth is a single item in a rule.
> +		 */
> +		if (!items->spec &&
> +			items->type == RTE_FLOW_ITEM_TYPE_ETH) {
> +			const struct rte_flow_item *next = items + 1;
> +
> +			if (next->type != RTE_FLOW_ITEM_TYPE_END) {
> +				rte_flow_error_set(error, ENOTSUP,
> +						   RTE_FLOW_ERROR_TYPE_ITEM,
> +						   items,
> +						   "the rule requires"
> +						   " an Ethernet spec");
> +				return -rte_errno;
> +			}
> +		}
> +		for (i = 0;
> +		     cur_item->items &&
> +		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
> +		     ++i) {
> +			if (cur_item->items[i] == items->type) {
> +				token = &mlx4_flow_items[items->type];
> +				break;
> +			}
> +		}
> +		if (!token)
> +			goto exit_item_not_supported;
> +		cur_item = token;
> +		err = cur_item->validate(items,
> +					(const uint8_t *)cur_item->mask,
> +					 cur_item->mask_sz);
> +		if (err)
> +			goto exit_item_not_supported;
> +		if (flow->ibv_attr && cur_item->convert) {
> +			err = cur_item->convert(items,
> +						(cur_item->default_mask ?
> +						 cur_item->default_mask :
> +						 cur_item->mask),
> +						 flow);
> +			if (err)
> +				goto exit_item_not_supported;
> +		}
> +		flow->offset += cur_item->dst_sz;
> +	}
> +	/* Go over actions list */
> +	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
> +		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
> +			continue;
> +		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
> +			action.drop = 1;
> +		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
> +			const struct rte_flow_action_queue *queue =
> +				(const struct rte_flow_action_queue *)
> +				actions->conf;
> +
> +			if (!queue || (queue->index > (priv->rxqs_n - 1)))
> +				goto exit_action_not_supported;
> +			action.queue = 1;
> +		} else {
> +			goto exit_action_not_supported;
> +		}
> +	}
> +	if (!action.queue && !action.drop) {
> +		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
> +				   NULL, "no valid action");
> +		return -rte_errno;
> +	}
> +	return 0;
> +exit_item_not_supported:
> +	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
> +			   items, "item not supported");
> +	return -rte_errno;
> +exit_action_not_supported:
> +	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
> +			   actions, "action not supported");
> +	return -rte_errno;
> +}
> +
> +/**
> + * Validate a flow supported by the NIC.
> + *
> + * @see rte_flow_validate()
> + * @see rte_flow_ops
> + */
> +int
> +mlx4_flow_validate(struct rte_eth_dev *dev,
> +		   const struct rte_flow_attr *attr,
> +		   const struct rte_flow_item items[],
> +		   const struct rte_flow_action actions[],
> +		   struct rte_flow_error *error)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +	int ret;
> +	struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr) };
> +
> +	priv_lock(priv);
> +	ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
> +	priv_unlock(priv);
> +	return ret;
> +}
> +
> +/**
> + * Complete flow rule creation.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param ibv_attr
> + *   Verbs flow attributes.
> + * @param action
> + *   Target action structure.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL.
> + *
> + * @return
> + *   A flow if the rule could be created.
> + */
> +static struct rte_flow *
> +priv_flow_create_action_queue(struct priv *priv,
> +			      struct ibv_flow_attr *ibv_attr,
> +			      struct mlx4_flow_action *action,
> +			      struct rte_flow_error *error)
> +{
> +	struct rxq *rxq;
> +	struct ibv_qp *qp;
> +	struct rte_flow *rte_flow;
> +
> +	assert(priv->pd);
> +	assert(priv->ctx);
> +	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
> +	if (!rte_flow) {
> +		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
> +				   NULL, "cannot allocate flow memory");
> +		return NULL;
> +	}
> +	rxq = (*priv->rxqs)[action->queue_id];
> +	if (action->drop) {
> +		rte_flow->cq =
> +			ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
> +					  &(struct ibv_exp_cq_init_attr){
> +						  .comp_mask = 0,
> +					  });
> +		if (!rte_flow->cq) {
> +			rte_flow_error_set(error, ENOMEM,
> +					   RTE_FLOW_ERROR_TYPE_HANDLE,
> +					   NULL, "cannot allocate CQ");
> +			goto error;
> +		}
> +		rte_flow->qp = ibv_exp_create_qp(
> +			priv->ctx,
> +			&(struct ibv_exp_qp_init_attr){
> +				.send_cq = rte_flow->cq,
> +				.recv_cq = rte_flow->cq,
> +				.cap = {
> +					.max_recv_wr = 1,
> +					.max_recv_sge = 1,
> +				},
> +				.qp_type = IBV_QPT_RAW_PACKET,
> +				.comp_mask =
> +					IBV_EXP_QP_INIT_ATTR_PD |
> +					IBV_EXP_QP_INIT_ATTR_PORT |
> +					IBV_EXP_QP_INIT_ATTR_RES_DOMAIN,
> +				.pd = priv->pd,
> +				.res_domain = rxq->rd,
> +				.port_num = priv->port,
> +			});
> +		if (!rte_flow->qp) {
> +			rte_flow_error_set(error, ENOMEM,
> +					   RTE_FLOW_ERROR_TYPE_HANDLE,
> +					   NULL, "cannot allocate QP");
> +			goto error;
> +		}
> +		qp = rte_flow->qp;
> +	} else {
> +		rte_flow->rxq = rxq;
> +		qp = rxq->qp;
> +	}
> +	rte_flow->ibv_attr = ibv_attr;
> +	rte_flow->ibv_flow = ibv_create_flow(qp, rte_flow->ibv_attr);
> +	if (!rte_flow->ibv_flow) {
> +		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
> +				   NULL, "flow rule creation failure");
> +		goto error;
> +	}
> +	return rte_flow;
> +
> +error:
> +	assert(rte_flow);
> +	if (rte_flow->cq)
> +		ibv_destroy_cq(rte_flow->cq);
> +	if (rte_flow->qp)
> +		ibv_destroy_qp(rte_flow->qp);
> +	rte_free(rte_flow->ibv_attr);
> +	rte_free(rte_flow);
> +	return NULL;
> +}
> +
> +/**
> + * Convert a flow.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param[in] attr
> + *   Flow rule attributes.
> + * @param[in] items
> + *   Pattern specification (list terminated by the END pattern item).
> + * @param[in] actions
> + *   Associated actions (list terminated by the END action).
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL.
> + *
> + * @return
> + *   A flow on success, NULL otherwise.
> + */
> +static struct rte_flow *
> +priv_flow_create(struct priv *priv,
> +		 const struct rte_flow_attr *attr,
> +		 const struct rte_flow_item items[],
> +		 const struct rte_flow_action actions[],
> +		 struct rte_flow_error *error)
> +{
> +	struct rte_flow *rte_flow;
> +	struct mlx4_flow_action action;
> +	struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr), };
> +	int err;
> +
> +	err = priv_flow_validate(priv, attr, items, actions, error, &flow);
> +	if (err)
> +		return NULL;
> +	flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
> +	if (!flow.ibv_attr) {
> +		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
> +				   NULL, "cannot allocate ibv_attr memory");
> +		return NULL;
> +	}
> +	flow.offset = sizeof(struct ibv_flow_attr);
> +	*flow.ibv_attr = (struct ibv_flow_attr){
> +		.comp_mask = 0,
> +		.type = IBV_FLOW_ATTR_NORMAL,
> +		.size = sizeof(struct ibv_flow_attr),
> +		.priority = attr->priority,
> +		.num_of_specs = 0,
> +		.port = priv->port,
> +		.flags = 0,
> +	};
> +	claim_zero(priv_flow_validate(priv, attr, items, actions,
> +				      error, &flow));
> +	action = (struct mlx4_flow_action){
> +		.queue = 0,
> +		.drop = 0,
> +	};
> +	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
> +		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
> +			continue;
> +		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
> +			action.queue = 1;
> +			action.queue_id =
> +				((const struct rte_flow_action_queue *)
> +				 actions->conf)->index;
> +		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
> +			action.drop = 1;
> +		} else {
> +			rte_flow_error_set(error, ENOTSUP,
> +					   RTE_FLOW_ERROR_TYPE_ACTION,
> +					   actions, "unsupported action");
> +			goto exit;
> +		}
> +	}
> +	rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
> +						 &action, error);
> +	return rte_flow;
> +exit:
> +	rte_free(flow.ibv_attr);
> +	return NULL;
> +}
> +
> +/**
> + * Create a flow.
> + *
> + * @see rte_flow_create()
> + * @see rte_flow_ops
> + */
> +struct rte_flow *
> +mlx4_flow_create(struct rte_eth_dev *dev,
> +		 const struct rte_flow_attr *attr,
> +		 const struct rte_flow_item items[],
> +		 const struct rte_flow_action actions[],
> +		 struct rte_flow_error *error)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +	struct rte_flow *flow;
> +
> +	priv_lock(priv);
> +	flow = priv_flow_create(priv, attr, items, actions, error);
> +	if (flow) {
> +		LIST_INSERT_HEAD(&priv->flows, flow, next);
> +		DEBUG("Flow created %p", (void *)flow);
> +	}
> +	priv_unlock(priv);
> +	return flow;
> +}
> +
> +/**
> + * Destroy a flow.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param[in] flow
> + *   Flow to destroy.
> + */
> +static void
> +priv_flow_destroy(struct priv *priv, struct rte_flow *flow)
> +{
> +	(void)priv;
> +	LIST_REMOVE(flow, next);
> +	if (flow->ibv_flow)
> +		claim_zero(ibv_destroy_flow(flow->ibv_flow));
> +	if (flow->qp)
> +		claim_zero(ibv_destroy_qp(flow->qp));
> +	if (flow->cq)
> +		claim_zero(ibv_destroy_cq(flow->cq));
> +	rte_free(flow->ibv_attr);
> +	DEBUG("Flow destroyed %p", (void *)flow);
> +	rte_free(flow);
> +}
> +
> +/**
> + * Destroy a flow.
> + *
> + * @see rte_flow_destroy()
> + * @see rte_flow_ops
> + */
> +int
> +mlx4_flow_destroy(struct rte_eth_dev *dev,
> +		  struct rte_flow *flow,
> +		  struct rte_flow_error *error)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +
> +	(void)error;
> +	priv_lock(priv);
> +	priv_flow_destroy(priv, flow);
> +	priv_unlock(priv);
> +	return 0;
> +}
> +
> +/**
> + * Destroy all flows.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + */
> +static void
> +priv_flow_flush(struct priv *priv)
> +{
> +	while (!LIST_EMPTY(&priv->flows)) {
> +		struct rte_flow *flow;
> +
> +		flow = LIST_FIRST(&priv->flows);
> +		priv_flow_destroy(priv, flow);
> +	}
> +}
> +
> +/**
> + * Destroy all flows.
> + *
> + * @see rte_flow_flush()
> + * @see rte_flow_ops
> + */
> +int
> +mlx4_flow_flush(struct rte_eth_dev *dev,
> +		struct rte_flow_error *error)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +
> +	(void)error;
> +	priv_lock(priv);
> +	priv_flow_flush(priv);
> +	priv_unlock(priv);
> +	return 0;
> +}
> +
> +/**
> + * Remove all flows.
> + *
> + * Called by dev_stop() to remove all flows.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + */
> +void
> +mlx4_priv_flow_stop(struct priv *priv)
> +{
> +	struct rte_flow *flow;
> +
> +	for (flow = LIST_FIRST(&priv->flows);
> +	     flow;
> +	     flow = LIST_NEXT(flow, next)) {
> +		claim_zero(ibv_destroy_flow(flow->ibv_flow));
> +		flow->ibv_flow = NULL;
> +		DEBUG("Flow %p removed", (void *)flow);
> +	}
> +}
> +
> +/**
> + * Add all flows.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + *
> + * @return
> + *   0 on success, a errno value otherwise and rte_errno is set.
> + */
> +int
> +mlx4_priv_flow_start(struct priv *priv)
> +{
> +	struct ibv_qp *qp;
> +	struct rte_flow *flow;
> +
> +	for (flow = LIST_FIRST(&priv->flows);
> +	     flow;
> +	     flow = LIST_NEXT(flow, next)) {
> +		qp = flow->qp ? flow->qp : flow->rxq->qp;
> +		flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
> +		if (!flow->ibv_flow) {
> +			DEBUG("Flow %p cannot be applied", (void *)flow);
> +			rte_errno = EINVAL;
> +			return rte_errno;
> +		}
> +		DEBUG("Flow %p applied", (void *)flow);
> +	}
> +	return 0;
> +}
> diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
> new file mode 100644
> index 0000000..537ffdf
> --- /dev/null
> +++ b/drivers/net/mlx4/mlx4_flow.h
> @@ -0,0 +1,104 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright 2017 6WIND S.A.
> + *   Copyright 2017 Mellanox.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of 6WIND S.A. nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef RTE_PMD_MLX4_FLOW_H_
> +#define RTE_PMD_MLX4_FLOW_H_
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +#include <sys/queue.h>
> +
> +/* Verbs header. */
> +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic ignored "-Wpedantic"
> +#endif
> +#include <infiniband/verbs.h>
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic error "-Wpedantic"
> +#endif
> +
> +#include <rte_flow.h>
> +#include <rte_flow_driver.h>
> +#include <rte_byteorder.h>
> +
> +#include "mlx4.h"
> +
> +struct rte_flow {
> +	LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
> +	struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
> +	struct ibv_flow *ibv_flow; /**< Verbs flow. */
> +	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
> +	struct ibv_qp *qp; /**< Verbs queue pair. */
> +	struct ibv_cq *cq; /**< Verbs completion queue. */
> +};
> +
> +int
> +mlx4_flow_validate(struct rte_eth_dev *dev,
> +		   const struct rte_flow_attr *attr,
> +		   const struct rte_flow_item items[],
> +		   const struct rte_flow_action actions[],
> +		   struct rte_flow_error *error);
> +
> +struct rte_flow *
> +mlx4_flow_create(struct rte_eth_dev *dev,
> +		 const struct rte_flow_attr *attr,
> +		 const struct rte_flow_item items[],
> +		 const struct rte_flow_action actions[],
> +		 struct rte_flow_error *error);
> +
> +int
> +mlx4_flow_destroy(struct rte_eth_dev *,
> +		  struct rte_flow *,
> +		  struct rte_flow_error *);
> +
> +int
> +mlx4_flow_flush(struct rte_eth_dev *dev,
> +		struct rte_flow_error *error);
> +
> +/** Structure to pass to the conversion function. */
> +struct mlx4_flow {
> +	struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
> +	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
> +};
> +
> +struct mlx4_flow_action {
> +	uint32_t drop:1; /**< Target is a drop queue. */
> +	uint32_t queue:1; /**< Target is a receive queue. */
> +	uint32_t queue_id; /**< Identifier of the queue. */
> +};
> +
> +int mlx4_priv_flow_start(struct priv *);
> +void mlx4_priv_flow_stop(struct priv *);
> +
> +#endif /* RTE_PMD_MLX4_FLOW_H_ */
> -- 
> 1.8.3.1
> 
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>

-- 
Nélio Laranjeiro
6WIND

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net/mlx4: support basic flow items and actions
  2017-02-22  8:37   ` Nélio Laranjeiro
@ 2017-02-22 10:10     ` Nélio Laranjeiro
  0 siblings, 0 replies; 15+ messages in thread
From: Nélio Laranjeiro @ 2017-02-22 10:10 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Adrien Mazarguil

On Wed, Feb 22, 2017 at 09:37:42AM +0100, Nélio Laranjeiro wrote:
> On Tue, Feb 21, 2017 at 02:07:03PM +0000, Vasily Philipov wrote:
> > Adding support for the next items: eth, vlan, ipv4, udp, tcp and for the
> > next actions: queue, drop
> > 
> > Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> > ---
> >  drivers/net/mlx4/Makefile    |    3 +-
> >  drivers/net/mlx4/mlx4.c      |   60 ++-
> >  drivers/net/mlx4/mlx4.h      |    3 +
> >  drivers/net/mlx4/mlx4_flow.c | 1053 ++++++++++++++++++++++++++++++++++++++++++
> >  drivers/net/mlx4/mlx4_flow.h |  104 +++++
> >  5 files changed, 1220 insertions(+), 3 deletions(-)
> >  create mode 100644 drivers/net/mlx4/mlx4_flow.c
> >  create mode 100644 drivers/net/mlx4/mlx4_flow.h
> > 
>[...]
> > diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> > index 82ccac8..cc2ebfa 100644
> > --- a/drivers/net/mlx4/mlx4.c
> > +++ b/drivers/net/mlx4/mlx4.c
> > @@ -3985,8 +3989,9 @@ struct txq_mp2mr_mbuf_check_data {
> >  		return -ret;
> >  	} while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
> >  	priv_dev_interrupt_handler_install(priv, dev);
> > +	err = mlx4_priv_flow_start(priv);
> >  	priv_unlock(priv);
> > -	return 0;
> > +	return -err;
>[...]

Hi Vasily,

There is an issue in this mlx4_dev_start() when flows cannot be
re-applied (like in mlx5 [1][2]).  Can you fix it in a v2 please?

Thanks,

[1] http://dpdk.org/ml/archives/dev/2017-February/058111.html
[2] http://dpdk.org/dev/patchwork/patch/20664/

-- 
Nélio Laranjeiro
6WIND

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [dpdk-dev] [PATCH v2 1/2] net/mlx4: split the definitions to the header file
  2017-02-21 14:07 [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
  2017-02-21 14:07 ` [dpdk-dev] [PATCH 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
  2017-02-22  8:37 ` [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Nélio Laranjeiro
@ 2017-02-22 13:42 ` Vasily Philipov
  2017-02-22 19:04   ` Ferruh Yigit
  2017-02-22 13:42 ` [dpdk-dev] [PATCH v2 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 15+ messages in thread
From: Vasily Philipov @ 2017-02-22 13:42 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

Make some structs/defines visible from different source files by placing
them into mlx4.h header.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c | 183 ++--------------------------------------------
 drivers/net/mlx4/mlx4.h | 187 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 189 insertions(+), 181 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 79efaaa..82ccac8 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -1,8 +1,8 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright 2012-2015 6WIND S.A.
- *   Copyright 2012 Mellanox.
+ *   Copyright 2012-2017 6WIND S.A.
+ *   Copyright 2012-2017 Mellanox.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -68,10 +68,6 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_ether.h>
 #include <rte_ethdev.h>
 #include <rte_dev.h>
@@ -86,9 +82,6 @@
 #include <rte_log.h>
 #include <rte_alarm.h>
 #include <rte_memory.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
 
 /* Generated configuration header. */
 #include "mlx4_autoconf.h"
@@ -96,21 +89,6 @@
 /* PMD header. */
 #include "mlx4.h"
 
-/* Runtime logging through RTE_LOG() is enabled when not in debugging mode.
- * Intermediate LOG_*() macros add the required end-of-line characters. */
-#ifndef NDEBUG
-#define INFO(...) DEBUG(__VA_ARGS__)
-#define WARN(...) DEBUG(__VA_ARGS__)
-#define ERROR(...) DEBUG(__VA_ARGS__)
-#else
-#define LOG__(level, m, ...) \
-	RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
-#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
-#define INFO(...) LOG_(INFO, __VA_ARGS__)
-#define WARN(...) LOG_(WARNING, __VA_ARGS__)
-#define ERROR(...) LOG_(ERR, __VA_ARGS__)
-#endif
-
 /* Convenience macros for accessing mbuf fields. */
 #define NEXT(m) ((m)->next)
 #define DATA_LEN(m) ((m)->data_len)
@@ -137,157 +115,6 @@
 	 (((val) & (from)) / ((from) / (to))) : \
 	 (((val) & (from)) * ((to) / (from))))
 
-struct mlx4_rxq_stats {
-	unsigned int idx; /**< Mapping index. */
-#ifdef MLX4_PMD_SOFT_COUNTERS
-	uint64_t ipackets;  /**< Total of successfully received packets. */
-	uint64_t ibytes;    /**< Total of successfully received bytes. */
-#endif
-	uint64_t idropped;  /**< Total of packets dropped when RX ring full. */
-	uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
-};
-
-struct mlx4_txq_stats {
-	unsigned int idx; /**< Mapping index. */
-#ifdef MLX4_PMD_SOFT_COUNTERS
-	uint64_t opackets; /**< Total of successfully sent packets. */
-	uint64_t obytes;   /**< Total of successfully sent bytes. */
-#endif
-	uint64_t odropped; /**< Total of packets not sent when TX ring full. */
-};
-
-/* RX element (scattered packets). */
-struct rxq_elt_sp {
-	struct ibv_recv_wr wr; /* Work Request. */
-	struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
-	struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
-};
-
-/* RX element. */
-struct rxq_elt {
-	struct ibv_recv_wr wr; /* Work Request. */
-	struct ibv_sge sge; /* Scatter/Gather Element. */
-	/* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
-};
-
-/* RX queue descriptor. */
-struct rxq {
-	struct priv *priv; /* Back pointer to private data. */
-	struct rte_mempool *mp; /* Memory Pool for allocations. */
-	struct ibv_mr *mr; /* Memory Region (for mp). */
-	struct ibv_cq *cq; /* Completion Queue. */
-	struct ibv_qp *qp; /* Queue Pair. */
-	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
-	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
-	/*
-	 * Each VLAN ID requires a separate flow steering rule.
-	 */
-	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
-	struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
-	struct ibv_flow *promisc_flow; /* Promiscuous flow. */
-	struct ibv_flow *allmulti_flow; /* Multicast flow. */
-	unsigned int port_id; /* Port ID for incoming packets. */
-	unsigned int elts_n; /* (*elts)[] length. */
-	unsigned int elts_head; /* Current index in (*elts)[]. */
-	union {
-		struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
-		struct rxq_elt (*no_sp)[]; /* RX elements. */
-	} elts;
-	unsigned int sp:1; /* Use scattered RX elements. */
-	unsigned int csum:1; /* Enable checksum offloading. */
-	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
-	struct mlx4_rxq_stats stats; /* RX queue counters. */
-	unsigned int socket; /* CPU socket ID for allocations. */
-	struct ibv_exp_res_domain *rd; /* Resource Domain. */
-};
-
-/* TX element. */
-struct txq_elt {
-	struct rte_mbuf *buf;
-};
-
-/* Linear buffer type. It is used when transmitting buffers with too many
- * segments that do not fit the hardware queue (see max_send_sge).
- * Extra segments are copied (linearized) in such buffers, replacing the
- * last SGE during TX.
- * The size is arbitrary but large enough to hold a jumbo frame with
- * 8 segments considering mbuf.buf_len is about 2048 bytes. */
-typedef uint8_t linear_t[16384];
-
-/* TX queue descriptor. */
-struct txq {
-	struct priv *priv; /* Back pointer to private data. */
-	struct {
-		const struct rte_mempool *mp; /* Cached Memory Pool. */
-		struct ibv_mr *mr; /* Memory Region (for mp). */
-		uint32_t lkey; /* mr->lkey */
-	} mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
-	struct ibv_cq *cq; /* Completion Queue. */
-	struct ibv_qp *qp; /* Queue Pair. */
-	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
-	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
-#if MLX4_PMD_MAX_INLINE > 0
-	uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
-#endif
-	unsigned int elts_n; /* (*elts)[] length. */
-	struct txq_elt (*elts)[]; /* TX elements. */
-	unsigned int elts_head; /* Current index in (*elts)[]. */
-	unsigned int elts_tail; /* First element awaiting completion. */
-	unsigned int elts_comp; /* Number of completion requests. */
-	unsigned int elts_comp_cd; /* Countdown for next completion request. */
-	unsigned int elts_comp_cd_init; /* Initial value for countdown. */
-	struct mlx4_txq_stats stats; /* TX queue counters. */
-	linear_t (*elts_linear)[]; /* Linearized buffers. */
-	struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
-	unsigned int socket; /* CPU socket ID for allocations. */
-	struct ibv_exp_res_domain *rd; /* Resource Domain. */
-};
-
-struct priv {
-	struct rte_eth_dev *dev; /* Ethernet device. */
-	struct ibv_context *ctx; /* Verbs context. */
-	struct ibv_device_attr device_attr; /* Device properties. */
-	struct ibv_pd *pd; /* Protection Domain. */
-	/*
-	 * MAC addresses array and configuration bit-field.
-	 * An extra entry that cannot be modified by the DPDK is reserved
-	 * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
-	 */
-	struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
-	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
-	/* VLAN filters. */
-	struct {
-		unsigned int enabled:1; /* If enabled. */
-		unsigned int id:12; /* VLAN ID (0-4095). */
-	} vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
-	/* Device properties. */
-	uint16_t mtu; /* Configured MTU. */
-	uint8_t port; /* Physical port number. */
-	unsigned int started:1; /* Device started, flows enabled. */
-	unsigned int promisc:1; /* Device in promiscuous mode. */
-	unsigned int allmulti:1; /* Device receives all multicast packets. */
-	unsigned int hw_qpg:1; /* QP groups are supported. */
-	unsigned int hw_tss:1; /* TSS is supported. */
-	unsigned int hw_rss:1; /* RSS is supported. */
-	unsigned int hw_csum:1; /* Checksum offload is supported. */
-	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
-	unsigned int rss:1; /* RSS is enabled. */
-	unsigned int vf:1; /* This is a VF device. */
-	unsigned int pending_alarm:1; /* An alarm is pending. */
-#ifdef INLINE_RECV
-	unsigned int inl_recv_size; /* Inline recv size */
-#endif
-	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
-	/* RX/TX queues. */
-	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
-	unsigned int rxqs_n; /* RX queues array size. */
-	unsigned int txqs_n; /* TX queues array size. */
-	struct rxq *(*rxqs)[]; /* RX queues. */
-	struct txq *(*txqs)[]; /* TX queues. */
-	struct rte_intr_handle intr_handle; /* Interrupt handler. */
-	rte_spinlock_t lock; /* Lock for control functions. */
-};
-
 /* Local storage for secondary process data. */
 struct mlx4_secondary_data {
 	struct rte_eth_dev_data data; /* Local device data. */
@@ -335,8 +162,7 @@ struct mlx4_secondary_data {
  * @param priv
  *   Pointer to private structure.
  */
-static void
-priv_lock(struct priv *priv)
+void priv_lock(struct priv *priv)
 {
 	rte_spinlock_lock(&priv->lock);
 }
@@ -347,8 +173,7 @@ struct mlx4_secondary_data {
  * @param priv
  *   Pointer to private structure.
  */
-static void
-priv_unlock(struct priv *priv)
+void priv_unlock(struct priv *priv)
 {
 	rte_spinlock_unlock(&priv->lock);
 }
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 4c7505e..70c9ecd 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -1,8 +1,8 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright 2012-2015 6WIND S.A.
- *   Copyright 2012 Mellanox.
+ *   Copyright 2012-2017 6WIND S.A.
+ *   Copyright 2012-2017 Mellanox.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -39,6 +39,33 @@
 #include <limits.h>
 
 /*
+ * Runtime logging through RTE_LOG() is enabled when not in debugging mode.
+ * Intermediate LOG_*() macros add the required end-of-line characters.
+ */
+#ifndef NDEBUG
+#define INFO(...) DEBUG(__VA_ARGS__)
+#define WARN(...) DEBUG(__VA_ARGS__)
+#define ERROR(...) DEBUG(__VA_ARGS__)
+#else
+#define LOG__(level, m, ...) \
+	RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
+#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
+#define INFO(...) LOG_(INFO, __VA_ARGS__)
+#define WARN(...) LOG_(WARNING, __VA_ARGS__)
+#define ERROR(...) LOG_(ERR, __VA_ARGS__)
+#endif
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+/*
  * Maximum number of simultaneous MAC addresses supported.
  *
  * According to ConnectX's Programmer Reference Manual:
@@ -160,4 +187,160 @@ enum {
 #define claim_positive(...) (__VA_ARGS__)
 #endif /* NDEBUG */
 
+struct mlx4_rxq_stats {
+	unsigned int idx; /**< Mapping index. */
+#ifdef MLX4_PMD_SOFT_COUNTERS
+	uint64_t ipackets; /**< Total of successfully received packets. */
+	uint64_t ibytes; /**< Total of successfully received bytes. */
+#endif
+	uint64_t idropped; /**< Total of packets dropped when RX ring full. */
+	uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
+};
+
+/* RX element (scattered packets). */
+struct rxq_elt_sp {
+	struct ibv_recv_wr wr; /* Work Request. */
+	struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
+	struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
+};
+
+/* RX element. */
+struct rxq_elt {
+	struct ibv_recv_wr wr; /* Work Request. */
+	struct ibv_sge sge; /* Scatter/Gather Element. */
+	/* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
+};
+
+/* RX queue descriptor. */
+struct rxq {
+	struct priv *priv; /* Back pointer to private data. */
+	struct rte_mempool *mp; /* Memory Pool for allocations. */
+	struct ibv_mr *mr; /* Memory Region (for mp). */
+	struct ibv_cq *cq; /* Completion Queue. */
+	struct ibv_qp *qp; /* Queue Pair. */
+	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
+	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
+	/*
+	 * Each VLAN ID requires a separate flow steering rule.
+	 */
+	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
+	struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
+	struct ibv_flow *promisc_flow; /* Promiscuous flow. */
+	struct ibv_flow *allmulti_flow; /* Multicast flow. */
+	unsigned int port_id; /* Port ID for incoming packets. */
+	unsigned int elts_n; /* (*elts)[] length. */
+	unsigned int elts_head; /* Current index in (*elts)[]. */
+	union {
+		struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
+		struct rxq_elt (*no_sp)[]; /* RX elements. */
+	} elts;
+	unsigned int sp:1; /* Use scattered RX elements. */
+	unsigned int csum:1; /* Enable checksum offloading. */
+	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
+	struct mlx4_rxq_stats stats; /* RX queue counters. */
+	unsigned int socket; /* CPU socket ID for allocations. */
+	struct ibv_exp_res_domain *rd; /* Resource Domain. */
+};
+
+/* TX element. */
+struct txq_elt {
+	struct rte_mbuf *buf;
+};
+
+struct mlx4_txq_stats {
+	unsigned int idx; /**< Mapping index. */
+#ifdef MLX4_PMD_SOFT_COUNTERS
+	uint64_t opackets; /**< Total of successfully sent packets. */
+	uint64_t obytes;   /**< Total of successfully sent bytes. */
+#endif
+	uint64_t odropped; /**< Total of packets not sent when TX ring full. */
+};
+
+/*
+ * Linear buffer type. It is used when transmitting buffers with too many
+ * segments that do not fit the hardware queue (see max_send_sge).
+ * Extra segments are copied (linearized) in such buffers, replacing the
+ * last SGE during TX.
+ * The size is arbitrary but large enough to hold a jumbo frame with
+ * 8 segments considering mbuf.buf_len is about 2048 bytes.
+ */
+typedef uint8_t linear_t[16384];
+
+/* TX queue descriptor. */
+struct txq {
+	struct priv *priv; /* Back pointer to private data. */
+	struct {
+		const struct rte_mempool *mp; /* Cached Memory Pool. */
+		struct ibv_mr *mr; /* Memory Region (for mp). */
+		uint32_t lkey; /* mr->lkey */
+	} mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
+	struct ibv_cq *cq; /* Completion Queue. */
+	struct ibv_qp *qp; /* Queue Pair. */
+	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
+	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
+#if MLX4_PMD_MAX_INLINE > 0
+	uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
+#endif
+	unsigned int elts_n; /* (*elts)[] length. */
+	struct txq_elt (*elts)[]; /* TX elements. */
+	unsigned int elts_head; /* Current index in (*elts)[]. */
+	unsigned int elts_tail; /* First element awaiting completion. */
+	unsigned int elts_comp; /* Number of completion requests. */
+	unsigned int elts_comp_cd; /* Countdown for next completion request. */
+	unsigned int elts_comp_cd_init; /* Initial value for countdown. */
+	struct mlx4_txq_stats stats; /* TX queue counters. */
+	linear_t (*elts_linear)[]; /* Linearized buffers. */
+	struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
+	unsigned int socket; /* CPU socket ID for allocations. */
+	struct ibv_exp_res_domain *rd; /* Resource Domain. */
+};
+
+struct priv {
+	struct rte_eth_dev *dev; /* Ethernet device. */
+	struct ibv_context *ctx; /* Verbs context. */
+	struct ibv_device_attr device_attr; /* Device properties. */
+	struct ibv_pd *pd; /* Protection Domain. */
+	/*
+	 * MAC addresses array and configuration bit-field.
+	 * An extra entry that cannot be modified by the DPDK is reserved
+	 * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
+	 */
+	struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
+	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
+	/* VLAN filters. */
+	struct {
+		unsigned int enabled:1; /* If enabled. */
+		unsigned int id:12; /* VLAN ID (0-4095). */
+	} vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
+	/* Device properties. */
+	uint16_t mtu; /* Configured MTU. */
+	uint8_t port; /* Physical port number. */
+	unsigned int started:1; /* Device started, flows enabled. */
+	unsigned int promisc:1; /* Device in promiscuous mode. */
+	unsigned int allmulti:1; /* Device receives all multicast packets. */
+	unsigned int hw_qpg:1; /* QP groups are supported. */
+	unsigned int hw_tss:1; /* TSS is supported. */
+	unsigned int hw_rss:1; /* RSS is supported. */
+	unsigned int hw_csum:1; /* Checksum offload is supported. */
+	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
+	unsigned int rss:1; /* RSS is enabled. */
+	unsigned int vf:1; /* This is a VF device. */
+	unsigned int pending_alarm:1; /* An alarm is pending. */
+#ifdef INLINE_RECV
+	unsigned int inl_recv_size; /* Inline recv size */
+#endif
+	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
+	/* RX/TX queues. */
+	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
+	unsigned int rxqs_n; /* RX queues array size. */
+	unsigned int txqs_n; /* TX queues array size. */
+	struct rxq *(*rxqs)[]; /* RX queues. */
+	struct txq *(*txqs)[]; /* TX queues. */
+	struct rte_intr_handle intr_handle; /* Interrupt handler. */
+	rte_spinlock_t lock; /* Lock for control functions. */
+};
+
+void priv_lock(struct priv *priv);
+void priv_unlock(struct priv *priv);
+
 #endif /* RTE_PMD_MLX4_H_ */
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [dpdk-dev] [PATCH v2 2/2] net/mlx4: support basic flow items and actions
  2017-02-21 14:07 [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
                   ` (2 preceding siblings ...)
  2017-02-22 13:42 ` [dpdk-dev] [PATCH v2 " Vasily Philipov
@ 2017-02-22 13:42 ` Vasily Philipov
  2017-03-05  7:51 ` [dpdk-dev] [PATCH v3 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
  2017-03-05  7:51 ` [dpdk-dev] [PATCH v3 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
  5 siblings, 0 replies; 15+ messages in thread
From: Vasily Philipov @ 2017-02-22 13:42 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

Adding support for the next items: eth, vlan, ipv4, udp, tcp and for the
next actions: queue, drop
---
 drivers/net/mlx4/Makefile    |    3 +-
 drivers/net/mlx4/mlx4.c      |  103 ++++-
 drivers/net/mlx4/mlx4.h      |    3 +
 drivers/net/mlx4/mlx4_flow.c | 1053 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx4/mlx4_flow.h |  104 +++++
 5 files changed, 1248 insertions(+), 18 deletions(-)
 create mode 100644 drivers/net/mlx4/mlx4_flow.c
 create mode 100644 drivers/net/mlx4/mlx4_flow.h

diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 68c5902..1d463f7 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -36,6 +36,7 @@ LIB = librte_pmd_mlx4.a
 
 # Sources.
 SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_flow.c
 
 # Dependencies.
 DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += lib/librte_ether
@@ -129,7 +130,7 @@ mlx4_autoconf.h: mlx4_autoconf.h.new
 		cmp '$<' '$@' $(AUTOCONF_OUTPUT) || \
 		mv '$<' '$@'
 
-mlx4.o: mlx4_autoconf.h
+$(SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD):.c=.o): mlx4_autoconf.h
 
 clean_mlx4: FORCE
 	$Q rm -f -- mlx4_autoconf.h mlx4_autoconf.h.new
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 82ccac8..e892f9c 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -82,12 +82,14 @@
 #include <rte_log.h>
 #include <rte_alarm.h>
 #include <rte_memory.h>
+#include <rte_flow.h>
 
 /* Generated configuration header. */
 #include "mlx4_autoconf.h"
 
-/* PMD header. */
+/* PMD headers. */
 #include "mlx4.h"
+#include "mlx4_flow.h"
 
 /* Convenience macros for accessing mbuf fields. */
 #define NEXT(m) ((m)->next)
@@ -2351,6 +2353,7 @@ struct txq_mp2mr_mbuf_check_data {
 	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
 	*attr = (struct ibv_flow_attr){
 		.type = IBV_FLOW_ATTR_NORMAL,
+		.priority = 3,
 		.num_of_specs = 1,
 		.port = priv->port,
 		.flags = 0
@@ -3938,6 +3941,7 @@ struct txq_mp2mr_mbuf_check_data {
 	unsigned int i = 0;
 	unsigned int r;
 	struct rxq *rxq;
+	int ret;
 
 	if (mlx4_is_secondary())
 		return -E_RTE_SECONDARY;
@@ -3957,36 +3961,50 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	/* Iterate only once when RSS is enabled. */
 	do {
-		int ret;
-
 		/* Ignore nonexistent RX queues. */
 		if (rxq == NULL)
 			continue;
 		ret = rxq_mac_addrs_add(rxq);
-		if (!ret && priv->promisc)
+		if (ret)
+			goto err;
+		if (priv->promisc)
 			ret = rxq_promiscuous_enable(rxq);
-		if (!ret && priv->allmulti)
+		if (ret) {
+			rxq_mac_addrs_del(rxq);
+			goto err;
+		}
+		if (priv->allmulti)
 			ret = rxq_allmulticast_enable(rxq);
 		if (!ret)
 			continue;
 		WARN("%p: QP flow attachment failed: %s",
 		     (void *)dev, strerror(ret));
-		/* Rollback. */
-		while (i != 0) {
-			rxq = (*priv->rxqs)[--i];
-			if (rxq != NULL) {
-				rxq_allmulticast_disable(rxq);
-				rxq_promiscuous_disable(rxq);
-				rxq_mac_addrs_del(rxq);
-			}
-		}
-		priv->started = 0;
-		priv_unlock(priv);
-		return -ret;
+		rxq_promiscuous_disable(rxq);
+		rxq_mac_addrs_del(rxq);
+		goto err;
 	} while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
 	priv_dev_interrupt_handler_install(priv, dev);
+	ret = mlx4_priv_flow_start(priv);
+	if (ret) {
+		ERROR("%p: flow start failed: %s",
+		      (void *)dev, strerror(ret));
+		goto err;
+	}
 	priv_unlock(priv);
 	return 0;
+err:
+	/* Rollback. */
+	while (i != 0) {
+		rxq = (*priv->rxqs)[--i];
+		if (rxq != NULL) {
+			rxq_allmulticast_disable(rxq);
+			rxq_promiscuous_disable(rxq);
+			rxq_mac_addrs_del(rxq);
+		}
+	}
+	priv->started = 0;
+	priv_unlock(priv);
+	return -ret;
 }
 
 /**
@@ -4021,6 +4039,7 @@ struct txq_mp2mr_mbuf_check_data {
 		rxq = (*priv->rxqs)[0];
 		r = priv->rxqs_n;
 	}
+	mlx4_priv_flow_stop(priv);
 	/* Iterate only once when RSS is enabled. */
 	do {
 		/* Ignore nonexistent RX queues. */
@@ -5022,6 +5041,55 @@ struct txq_mp2mr_mbuf_check_data {
 	return -ret;
 }
 
+const struct rte_flow_ops mlx4_flow_ops = {
+	.validate = mlx4_flow_validate,
+	.create = mlx4_flow_create,
+	.destroy = mlx4_flow_destroy,
+	.flush = mlx4_flow_flush,
+	.query = NULL,
+};
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param filter_type
+ *   Filter type.
+ * @param filter_op
+ *   Operation to perform.
+ * @param arg
+ *   Pointer to operation-specific structure.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+mlx4_dev_filter_ctrl(struct rte_eth_dev *dev,
+		     enum rte_filter_type filter_type,
+		     enum rte_filter_op filter_op,
+		     void *arg)
+{
+	int ret = EINVAL;
+
+	switch (filter_type) {
+	case RTE_ETH_FILTER_GENERIC:
+		if (filter_op != RTE_ETH_FILTER_GET)
+			return -EINVAL;
+		*(const void **)arg = &mlx4_flow_ops;
+		return 0;
+	case RTE_ETH_FILTER_FDIR:
+		DEBUG("%p: filter type FDIR is not supported by this PMD",
+		      (void *)dev);
+		break;
+	default:
+		ERROR("%p: filter type (%d) not supported",
+		      (void *)dev, filter_type);
+		break;
+	}
+	return -ret;
+}
+
 static const struct eth_dev_ops mlx4_dev_ops = {
 	.dev_configure = mlx4_dev_configure,
 	.dev_start = mlx4_dev_start,
@@ -5056,6 +5124,7 @@ struct txq_mp2mr_mbuf_check_data {
 	.mac_addr_add = mlx4_mac_addr_add,
 	.mac_addr_set = mlx4_mac_addr_set,
 	.mtu_set = mlx4_dev_set_mtu,
+	.filter_ctrl = mlx4_dev_filter_ctrl,
 };
 
 /**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 70c9ecd..fac408b 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -295,6 +295,8 @@ struct txq {
 	struct ibv_exp_res_domain *rd; /* Resource Domain. */
 };
 
+struct rte_flow;
+
 struct priv {
 	struct rte_eth_dev *dev; /* Ethernet device. */
 	struct ibv_context *ctx; /* Verbs context. */
@@ -337,6 +339,7 @@ struct priv {
 	struct rxq *(*rxqs)[]; /* RX queues. */
 	struct txq *(*txqs)[]; /* TX queues. */
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
+	LIST_HEAD(mlx4_flows, rte_flow) flows;
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
 
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
new file mode 100644
index 0000000..2328a18
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -0,0 +1,1053 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_malloc.h>
+
+/* Generated configuration header. */
+#include "mlx4_autoconf.h"
+
+/* PMD headers. */
+#include "mlx4.h"
+#include "mlx4_flow.h"
+
+/** Static initializer for items. */
+#define ITEMS(...) \
+	(const enum rte_flow_item_type []){ \
+		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
+	}
+
+/** Structure to generate a simple graph of layers supported by the NIC. */
+struct mlx4_flow_items {
+	/** List of possible actions for these items. */
+	const enum rte_flow_action_type *const actions;
+	/** Bit-masks corresponding to the possibilities for the item. */
+	const void *mask;
+	/**
+	 * Default bit-masks to use when item->mask is not provided. When
+	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
+	 * used instead.
+	 */
+	const void *default_mask;
+	/** Bit-masks size in bytes. */
+	const unsigned int mask_sz;
+	/**
+	 * Check support for a given item.
+	 *
+	 * @param item[in]
+	 *   Item specification.
+	 * @param mask[in]
+	 *   Bit-masks covering supported fields to compare with spec,
+	 *   last and mask in
+	 *   \item.
+	 * @param size
+	 *   Bit-Mask size in bytes.
+	 *
+	 * @return
+	 *   0 on success, negative value otherwise.
+	 */
+	int (*validate)(const struct rte_flow_item *item,
+			const uint8_t *mask, unsigned int size);
+	/**
+	 * Conversion function from rte_flow to NIC specific flow.
+	 *
+	 * @param item
+	 *   rte_flow item to convert.
+	 * @param default_mask
+	 *   Default bit-masks to use when item->mask is not provided.
+	 * @param data
+	 *   Internal structure to store the conversion.
+	 *
+	 * @return
+	 *   0 on success, negative value otherwise.
+	 */
+	int (*convert)(const struct rte_flow_item *item,
+		       const void *default_mask,
+		       void *data);
+	/** Size in bytes of the destination structure. */
+	const unsigned int dst_sz;
+	/** List of possible following items.  */
+	const enum rte_flow_item_type *const items;
+};
+
+/** Valid action for this PMD. */
+static const enum rte_flow_action_type valid_actions[] = {
+	RTE_FLOW_ACTION_TYPE_DROP,
+	RTE_FLOW_ACTION_TYPE_QUEUE,
+	RTE_FLOW_ACTION_TYPE_END,
+};
+
+/**
+ * Convert Ethernet item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx4_flow_create_eth(const struct rte_flow_item *item,
+		     const void *default_mask,
+		     void *data)
+{
+	const struct rte_flow_item_eth *spec = item->spec;
+	const struct rte_flow_item_eth *mask = item->mask;
+	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	struct ibv_flow_spec_eth *eth;
+	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+	unsigned int i;
+
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 2;
+	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*eth = (struct ibv_flow_spec_eth) {
+		.type = IBV_FLOW_SPEC_ETH,
+		.size = eth_size,
+	};
+	if (!spec) {
+		flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
+		return 0;
+	}
+	if (!mask)
+		mask = default_mask;
+	memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
+	memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+	memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
+	memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+	/* Remove unwanted bits from values. */
+	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+		eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
+		eth->val.src_mac[i] &= eth->mask.src_mac[i];
+	}
+	return 0;
+}
+
+/**
+ * Convert VLAN item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx4_flow_create_vlan(const struct rte_flow_item *item,
+		      const void *default_mask,
+		      void *data)
+{
+	const struct rte_flow_item_vlan *spec = item->spec;
+	const struct rte_flow_item_vlan *mask = item->mask;
+	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	struct ibv_flow_spec_eth *eth;
+	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+
+	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
+	if (!spec)
+		return 0;
+	if (!mask)
+		mask = default_mask;
+	eth->val.vlan_tag = spec->tci;
+	eth->mask.vlan_tag = mask->tci;
+	eth->val.vlan_tag &= eth->mask.vlan_tag;
+	return 0;
+}
+
+/**
+ * Convert IPv4 item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx4_flow_create_ipv4(const struct rte_flow_item *item,
+		      const void *default_mask,
+		      void *data)
+{
+	const struct rte_flow_item_ipv4 *spec = item->spec;
+	const struct rte_flow_item_ipv4 *mask = item->mask;
+	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	struct ibv_flow_spec_ipv4 *ipv4;
+	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4);
+
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 1;
+	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*ipv4 = (struct ibv_flow_spec_ipv4) {
+		.type = IBV_FLOW_SPEC_IPV4,
+		.size = ipv4_size,
+	};
+	if (!spec)
+		return 0;
+	ipv4->val = (struct ibv_flow_ipv4_filter) {
+		.src_ip = spec->hdr.src_addr,
+		.dst_ip = spec->hdr.dst_addr,
+	};
+	if (!mask)
+		mask = default_mask;
+	ipv4->mask = (struct ibv_flow_ipv4_filter) {
+		.src_ip = mask->hdr.src_addr,
+		.dst_ip = mask->hdr.dst_addr,
+	};
+	/* Remove unwanted bits from values. */
+	ipv4->val.src_ip &= ipv4->mask.src_ip;
+	ipv4->val.dst_ip &= ipv4->mask.dst_ip;
+	return 0;
+}
+
+/**
+ * Convert UDP item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx4_flow_create_udp(const struct rte_flow_item *item,
+		     const void *default_mask,
+		     void *data)
+{
+	const struct rte_flow_item_udp *spec = item->spec;
+	const struct rte_flow_item_udp *mask = item->mask;
+	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	struct ibv_flow_spec_tcp_udp *udp;
+	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 0;
+	udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*udp = (struct ibv_flow_spec_tcp_udp) {
+		.type = IBV_FLOW_SPEC_UDP,
+		.size = udp_size,
+	};
+	if (!spec)
+		return 0;
+	udp->val.dst_port = spec->hdr.dst_port;
+	udp->val.src_port = spec->hdr.src_port;
+	if (!mask)
+		mask = default_mask;
+	udp->mask.dst_port = mask->hdr.dst_port;
+	udp->mask.src_port = mask->hdr.src_port;
+	/* Remove unwanted bits from values. */
+	udp->val.src_port &= udp->mask.src_port;
+	udp->val.dst_port &= udp->mask.dst_port;
+	return 0;
+}
+
+/**
+ * Convert TCP item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx4_flow_create_tcp(const struct rte_flow_item *item,
+		     const void *default_mask,
+		     void *data)
+{
+	const struct rte_flow_item_tcp *spec = item->spec;
+	const struct rte_flow_item_tcp *mask = item->mask;
+	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	struct ibv_flow_spec_tcp_udp *tcp;
+	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 0;
+	tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*tcp = (struct ibv_flow_spec_tcp_udp) {
+		.type = IBV_FLOW_SPEC_TCP,
+		.size = tcp_size,
+	};
+	if (!spec)
+		return 0;
+	tcp->val.dst_port = spec->hdr.dst_port;
+	tcp->val.src_port = spec->hdr.src_port;
+	if (!mask)
+		mask = default_mask;
+	tcp->mask.dst_port = mask->hdr.dst_port;
+	tcp->mask.src_port = mask->hdr.src_port;
+	/* Remove unwanted bits from values. */
+	tcp->val.src_port &= tcp->mask.src_port;
+	tcp->val.dst_port &= tcp->mask.dst_port;
+	return 0;
+}
+
+/**
+ * Check support for a given item.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param mask[in]
+ *   Bit-masks covering supported fields to compare with spec, last and mask in
+ *   \item.
+ * @param size
+ *   Bit-Mask size in bytes.
+ *
+ * @return
+ *   0 on success, negative value otherwise.
+ */
+static int
+mlx4_flow_item_validate(const struct rte_flow_item *item,
+			const uint8_t *mask, unsigned int size)
+{
+	int ret = 0;
+
+	if (!item->spec && (item->mask || item->last))
+		return -1;
+	if (item->spec && !item->mask) {
+		unsigned int i;
+		const uint8_t *spec = item->spec;
+
+		for (i = 0; i < size; ++i)
+			if ((spec[i] | mask[i]) != mask[i])
+				return -1;
+	}
+	if (item->last && !item->mask) {
+		unsigned int i;
+		const uint8_t *spec = item->last;
+
+		for (i = 0; i < size; ++i)
+			if ((spec[i] | mask[i]) != mask[i])
+				return -1;
+	}
+	if (item->spec && item->last) {
+		uint8_t spec[size];
+		uint8_t last[size];
+		const uint8_t *apply = mask;
+		unsigned int i;
+
+		if (item->mask)
+			apply = item->mask;
+		for (i = 0; i < size; ++i) {
+			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
+			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
+		}
+		ret = memcmp(spec, last, size);
+	}
+	return ret;
+}
+
+static int
+mlx4_flow_validate_eth(const struct rte_flow_item *item,
+		       const uint8_t *mask, unsigned int size)
+{
+	if (item->mask) {
+		const struct rte_flow_item_eth *mask = item->mask;
+
+		if (mask->dst.addr_bytes[0] != 0xff ||
+				mask->dst.addr_bytes[1] != 0xff ||
+				mask->dst.addr_bytes[2] != 0xff ||
+				mask->dst.addr_bytes[3] != 0xff ||
+				mask->dst.addr_bytes[4] != 0xff ||
+				mask->dst.addr_bytes[5] != 0xff)
+			return -1;
+	}
+	return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_vlan(const struct rte_flow_item *item,
+			const uint8_t *mask, unsigned int size)
+{
+	if (item->mask) {
+		const struct rte_flow_item_vlan *mask = item->mask;
+
+		if (mask->tci != 0 &&
+		    ntohs(mask->tci) != 0x0fff)
+			return -1;
+	}
+	return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_ipv4(const struct rte_flow_item *item,
+			const uint8_t *mask, unsigned int size)
+{
+	if (item->mask) {
+		const struct rte_flow_item_ipv4 *mask = item->mask;
+
+		if (mask->hdr.src_addr != 0 &&
+		    mask->hdr.src_addr != 0xffffffff)
+			return -1;
+		if (mask->hdr.dst_addr != 0 &&
+		    mask->hdr.dst_addr != 0xffffffff)
+			return -1;
+	}
+	return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_udp(const struct rte_flow_item *item,
+		       const uint8_t *mask, unsigned int size)
+{
+	if (item->mask) {
+		const struct rte_flow_item_udp *mask = item->mask;
+
+		if (mask->hdr.src_port != 0 &&
+		    mask->hdr.src_port != 0xffff)
+			return -1;
+		if (mask->hdr.dst_port != 0 &&
+		    mask->hdr.dst_port != 0xffff)
+			return -1;
+	}
+	return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_tcp(const struct rte_flow_item *item,
+		       const uint8_t *mask, unsigned int size)
+{
+	if (item->mask) {
+		const struct rte_flow_item_tcp *mask = item->mask;
+
+		if (mask->hdr.src_port != 0 &&
+		    mask->hdr.src_port != 0xffff)
+			return -1;
+		if (mask->hdr.dst_port != 0 &&
+		    mask->hdr.dst_port != 0xffff)
+			return -1;
+	}
+	return mlx4_flow_item_validate(item, mask, size);
+}
+
+/** Graph of supported items and associated actions. */
+static const struct mlx4_flow_items mlx4_flow_items[] = {
+	[RTE_FLOW_ITEM_TYPE_END] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
+	},
+	[RTE_FLOW_ITEM_TYPE_ETH] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
+			       RTE_FLOW_ITEM_TYPE_IPV4),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_eth){
+			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		},
+		.default_mask = &rte_flow_item_eth_mask,
+		.mask_sz = sizeof(struct rte_flow_item_eth),
+		.validate = mlx4_flow_validate_eth,
+		.convert = mlx4_flow_create_eth,
+		.dst_sz = sizeof(struct ibv_flow_spec_eth),
+	},
+	[RTE_FLOW_ITEM_TYPE_VLAN] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_vlan){
+		/* rte_flow_item_vlan_mask is invalid for mlx4. */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+			.tci = 0x0fff,
+#else
+			.tci = 0xff0f,
+#endif
+		},
+		.mask_sz = sizeof(struct rte_flow_item_vlan),
+		.validate = mlx4_flow_validate_vlan,
+		.convert = mlx4_flow_create_vlan,
+		.dst_sz = 0,
+	},
+	[RTE_FLOW_ITEM_TYPE_IPV4] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
+			       RTE_FLOW_ITEM_TYPE_TCP),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_ipv4){
+			.hdr = {
+				.src_addr = -1,
+				.dst_addr = -1,
+			},
+		},
+		.default_mask = &rte_flow_item_ipv4_mask,
+		.mask_sz = sizeof(struct rte_flow_item_ipv4),
+		.validate = mlx4_flow_validate_ipv4,
+		.convert = mlx4_flow_create_ipv4,
+		.dst_sz = sizeof(struct ibv_flow_spec_ipv4),
+	},
+	[RTE_FLOW_ITEM_TYPE_UDP] = {
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_udp){
+			.hdr = {
+				.src_port = -1,
+				.dst_port = -1,
+			},
+		},
+		.default_mask = &rte_flow_item_udp_mask,
+		.mask_sz = sizeof(struct rte_flow_item_udp),
+		.validate = mlx4_flow_validate_udp,
+		.convert = mlx4_flow_create_udp,
+		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
+	},
+	[RTE_FLOW_ITEM_TYPE_TCP] = {
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_tcp){
+			.hdr = {
+				.src_port = -1,
+				.dst_port = -1,
+			},
+		},
+		.default_mask = &rte_flow_item_tcp_mask,
+		.mask_sz = sizeof(struct rte_flow_item_tcp),
+		.validate = mlx4_flow_validate_tcp,
+		.convert = mlx4_flow_create_tcp,
+		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
+	},
+};
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] items
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ * @param[in, out] flow
+ *   Flow structure to update.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_validate(struct priv *priv,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error,
+		   struct mlx4_flow *flow)
+{
+	const struct mlx4_flow_items *cur_item = mlx4_flow_items;
+	struct mlx4_flow_action action = {
+		.queue = 0,
+		.drop = 0,
+	};
+
+	(void)priv;
+	if (attr->group) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+				   NULL,
+				   "groups are not supported");
+		return -rte_errno;
+	}
+	if (attr->priority) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+				   NULL,
+				   "priorities are not supported");
+		return -rte_errno;
+	}
+	if (attr->egress) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+				   NULL,
+				   "egress is not supported");
+		return -rte_errno;
+	}
+	if (!attr->ingress) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+				   NULL,
+				   "only ingress is supported");
+		return -rte_errno;
+	}
+	/* Go over items list. */
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+		const struct mlx4_flow_items *token = NULL;
+		unsigned int i;
+		int err;
+
+		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+			continue;
+		/*
+		 * The nic can support patterns with NULL eth spec only
+		 * if eth is a single item in a rule.
+		 */
+		if (!items->spec &&
+			items->type == RTE_FLOW_ITEM_TYPE_ETH) {
+			const struct rte_flow_item *next = items + 1;
+
+			if (next->type != RTE_FLOW_ITEM_TYPE_END) {
+				rte_flow_error_set(error, ENOTSUP,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   items,
+						   "the rule requires"
+						   " an Ethernet spec");
+				return -rte_errno;
+			}
+		}
+		for (i = 0;
+		     cur_item->items &&
+		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
+		     ++i) {
+			if (cur_item->items[i] == items->type) {
+				token = &mlx4_flow_items[items->type];
+				break;
+			}
+		}
+		if (!token)
+			goto exit_item_not_supported;
+		cur_item = token;
+		err = cur_item->validate(items,
+					(const uint8_t *)cur_item->mask,
+					 cur_item->mask_sz);
+		if (err)
+			goto exit_item_not_supported;
+		if (flow->ibv_attr && cur_item->convert) {
+			err = cur_item->convert(items,
+						(cur_item->default_mask ?
+						 cur_item->default_mask :
+						 cur_item->mask),
+						 flow);
+			if (err)
+				goto exit_item_not_supported;
+		}
+		flow->offset += cur_item->dst_sz;
+	}
+	/* Go over actions list */
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+			continue;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+			action.drop = 1;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+			const struct rte_flow_action_queue *queue =
+				(const struct rte_flow_action_queue *)
+				actions->conf;
+
+			if (!queue || (queue->index > (priv->rxqs_n - 1)))
+				goto exit_action_not_supported;
+			action.queue = 1;
+		} else {
+			goto exit_action_not_supported;
+		}
+	}
+	if (!action.queue && !action.drop) {
+		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "no valid action");
+		return -rte_errno;
+	}
+	return 0;
+exit_item_not_supported:
+	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+			   items, "item not supported");
+	return -rte_errno;
+exit_action_not_supported:
+	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+			   actions, "action not supported");
+	return -rte_errno;
+}
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_validate(struct rte_eth_dev *dev,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	int ret;
+	struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr) };
+
+	priv_lock(priv);
+	ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
+	priv_unlock(priv);
+	return ret;
+}
+
+/**
+ * Complete flow rule creation.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param ibv_attr
+ *   Verbs flow attributes.
+ * @param action
+ *   Target action structure.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A flow if the rule could be created.
+ */
+static struct rte_flow *
+priv_flow_create_action_queue(struct priv *priv,
+			      struct ibv_flow_attr *ibv_attr,
+			      struct mlx4_flow_action *action,
+			      struct rte_flow_error *error)
+{
+	struct rxq *rxq;
+	struct ibv_qp *qp;
+	struct rte_flow *rte_flow;
+
+	assert(priv->pd);
+	assert(priv->ctx);
+	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
+	if (!rte_flow) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "cannot allocate flow memory");
+		return NULL;
+	}
+	rxq = (*priv->rxqs)[action->queue_id];
+	if (action->drop) {
+		rte_flow->cq =
+			ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
+					  &(struct ibv_exp_cq_init_attr){
+						  .comp_mask = 0,
+					  });
+		if (!rte_flow->cq) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "cannot allocate CQ");
+			goto error;
+		}
+		rte_flow->qp = ibv_exp_create_qp(
+			priv->ctx,
+			&(struct ibv_exp_qp_init_attr){
+				.send_cq = rte_flow->cq,
+				.recv_cq = rte_flow->cq,
+				.cap = {
+					.max_recv_wr = 1,
+					.max_recv_sge = 1,
+				},
+				.qp_type = IBV_QPT_RAW_PACKET,
+				.comp_mask =
+					IBV_EXP_QP_INIT_ATTR_PD |
+					IBV_EXP_QP_INIT_ATTR_PORT |
+					IBV_EXP_QP_INIT_ATTR_RES_DOMAIN,
+				.pd = priv->pd,
+				.res_domain = rxq->rd,
+				.port_num = priv->port,
+			});
+		if (!rte_flow->qp) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "cannot allocate QP");
+			goto error;
+		}
+		qp = rte_flow->qp;
+	} else {
+		rte_flow->rxq = rxq;
+		qp = rxq->qp;
+	}
+	rte_flow->ibv_attr = ibv_attr;
+	rte_flow->ibv_flow = ibv_create_flow(qp, rte_flow->ibv_attr);
+	if (!rte_flow->ibv_flow) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "flow rule creation failure");
+		goto error;
+	}
+	return rte_flow;
+
+error:
+	assert(rte_flow);
+	if (rte_flow->cq)
+		ibv_destroy_cq(rte_flow->cq);
+	if (rte_flow->qp)
+		ibv_destroy_qp(rte_flow->qp);
+	rte_free(rte_flow->ibv_attr);
+	rte_free(rte_flow);
+	return NULL;
+}
+
+/**
+ * Convert a flow.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] items
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A flow on success, NULL otherwise.
+ */
+static struct rte_flow *
+priv_flow_create(struct priv *priv,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error)
+{
+	struct rte_flow *rte_flow;
+	struct mlx4_flow_action action;
+	struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr), };
+	int err;
+
+	err = priv_flow_validate(priv, attr, items, actions, error, &flow);
+	if (err)
+		return NULL;
+	flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
+	if (!flow.ibv_attr) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "cannot allocate ibv_attr memory");
+		return NULL;
+	}
+	flow.offset = sizeof(struct ibv_flow_attr);
+	*flow.ibv_attr = (struct ibv_flow_attr){
+		.comp_mask = 0,
+		.type = IBV_FLOW_ATTR_NORMAL,
+		.size = sizeof(struct ibv_flow_attr),
+		.priority = attr->priority,
+		.num_of_specs = 0,
+		.port = priv->port,
+		.flags = 0,
+	};
+	claim_zero(priv_flow_validate(priv, attr, items, actions,
+				      error, &flow));
+	action = (struct mlx4_flow_action){
+		.queue = 0,
+		.drop = 0,
+	};
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+			continue;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+			action.queue = 1;
+			action.queue_id =
+				((const struct rte_flow_action_queue *)
+				 actions->conf)->index;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+			action.drop = 1;
+		} else {
+			rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions, "unsupported action");
+			goto exit;
+		}
+	}
+	rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
+						 &action, error);
+	return rte_flow;
+exit:
+	rte_free(flow.ibv_attr);
+	return NULL;
+}
+
+/**
+ * Create a flow.
+ *
+ * @see rte_flow_create()
+ * @see rte_flow_ops
+ */
+struct rte_flow *
+mlx4_flow_create(struct rte_eth_dev *dev,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct rte_flow *flow;
+
+	priv_lock(priv);
+	flow = priv_flow_create(priv, attr, items, actions, error);
+	if (flow) {
+		LIST_INSERT_HEAD(&priv->flows, flow, next);
+		DEBUG("Flow created %p", (void *)flow);
+	}
+	priv_unlock(priv);
+	return flow;
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] flow
+ *   Flow to destroy.
+ */
+static void
+priv_flow_destroy(struct priv *priv, struct rte_flow *flow)
+{
+	(void)priv;
+	LIST_REMOVE(flow, next);
+	if (flow->ibv_flow)
+		claim_zero(ibv_destroy_flow(flow->ibv_flow));
+	if (flow->qp)
+		claim_zero(ibv_destroy_qp(flow->qp));
+	if (flow->cq)
+		claim_zero(ibv_destroy_cq(flow->cq));
+	rte_free(flow->ibv_attr);
+	DEBUG("Flow destroyed %p", (void *)flow);
+	rte_free(flow);
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_destroy(struct rte_eth_dev *dev,
+		  struct rte_flow *flow,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	(void)error;
+	priv_lock(priv);
+	priv_flow_destroy(priv, flow);
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+priv_flow_flush(struct priv *priv)
+{
+	while (!LIST_EMPTY(&priv->flows)) {
+		struct rte_flow *flow;
+
+		flow = LIST_FIRST(&priv->flows);
+		priv_flow_destroy(priv, flow);
+	}
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_flush(struct rte_eth_dev *dev,
+		struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	(void)error;
+	priv_lock(priv);
+	priv_flow_flush(priv);
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
+ * Remove all flows.
+ *
+ * Called by dev_stop() to remove all flows.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+void
+mlx4_priv_flow_stop(struct priv *priv)
+{
+	struct rte_flow *flow;
+
+	for (flow = LIST_FIRST(&priv->flows);
+	     flow;
+	     flow = LIST_NEXT(flow, next)) {
+		claim_zero(ibv_destroy_flow(flow->ibv_flow));
+		flow->ibv_flow = NULL;
+		DEBUG("Flow %p removed", (void *)flow);
+	}
+}
+
+/**
+ * Add all flows.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success, a errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_priv_flow_start(struct priv *priv)
+{
+	struct ibv_qp *qp;
+	struct rte_flow *flow;
+
+	for (flow = LIST_FIRST(&priv->flows);
+	     flow;
+	     flow = LIST_NEXT(flow, next)) {
+		qp = flow->qp ? flow->qp : flow->rxq->qp;
+		flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
+		if (!flow->ibv_flow) {
+			DEBUG("Flow %p cannot be applied", (void *)flow);
+			rte_errno = EINVAL;
+			return rte_errno;
+		}
+		DEBUG("Flow %p applied", (void *)flow);
+	}
+	return 0;
+}
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
new file mode 100644
index 0000000..537ffdf
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -0,0 +1,104 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_PMD_MLX4_FLOW_H_
+#define RTE_PMD_MLX4_FLOW_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/queue.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_byteorder.h>
+
+#include "mlx4.h"
+
+struct rte_flow {
+	LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
+	struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
+	struct ibv_flow *ibv_flow; /**< Verbs flow. */
+	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+	struct ibv_qp *qp; /**< Verbs queue pair. */
+	struct ibv_cq *cq; /**< Verbs completion queue. */
+};
+
+int
+mlx4_flow_validate(struct rte_eth_dev *dev,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error);
+
+struct rte_flow *
+mlx4_flow_create(struct rte_eth_dev *dev,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error);
+
+int
+mlx4_flow_destroy(struct rte_eth_dev *,
+		  struct rte_flow *,
+		  struct rte_flow_error *);
+
+int
+mlx4_flow_flush(struct rte_eth_dev *dev,
+		struct rte_flow_error *error);
+
+/** Structure to pass to the conversion function. */
+struct mlx4_flow {
+	struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
+	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
+};
+
+struct mlx4_flow_action {
+	uint32_t drop:1; /**< Target is a drop queue. */
+	uint32_t queue:1; /**< Target is a receive queue. */
+	uint32_t queue_id; /**< Identifier of the queue. */
+};
+
+int mlx4_priv_flow_start(struct priv *);
+void mlx4_priv_flow_stop(struct priv *);
+
+#endif /* RTE_PMD_MLX4_FLOW_H_ */
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/2] net/mlx4: split the definitions to the header file
  2017-02-22 13:42 ` [dpdk-dev] [PATCH v2 " Vasily Philipov
@ 2017-02-22 19:04   ` Ferruh Yigit
  2017-02-23 10:44     ` Vasily Philipov
  0 siblings, 1 reply; 15+ messages in thread
From: Ferruh Yigit @ 2017-02-22 19:04 UTC (permalink / raw)
  To: Vasily Philipov, dev; +Cc: Adrien Mazarguil, Nelio Laranjeiro

On 2/22/2017 1:42 PM, Vasily Philipov wrote:
> Make some structs/defines visible from different source files by placing
> them into mlx4.h header.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> ---
>  drivers/net/mlx4/mlx4.c | 183 ++--------------------------------------------
>  drivers/net/mlx4/mlx4.h | 187 +++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 189 insertions(+), 181 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index 79efaaa..82ccac8 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -1,8 +1,8 @@
>  /*-
>   *   BSD LICENSE
>   *
> - *   Copyright 2012-2015 6WIND S.A.
> - *   Copyright 2012 Mellanox.
> + *   Copyright 2012-2017 6WIND S.A.
> + *   Copyright 2012-2017 Mellanox.

Can someone knowledgeable about Copyright help please?

What is the year field in Copyright line for?
And above change updates Copyright from 2012 to 2012-2017, is this correct?

>   *
>   *   Redistribution and use in source and binary forms, with or without
>   *   modification, are permitted provided that the following conditions
> @@ -68,10 +68,6 @@
>  #pragma GCC diagnostic error "-Wpedantic"
>  #endif

Above invisible lines are  "#include <infiniband/verbs.h>" wrapped with
#pragma for pedantic.

That piece moved to "mlx4.h" [1], which included a few lines later, so
can these line be removed from this line?

>  
> -/* DPDK headers don't like -pedantic. */
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic ignored "-Wpedantic"
> -#endif

Comment says "DPDK headers don't like -pedantic", won't removing #pragma
cause compile error with pedantic option?

>  #include <rte_ether.h>
>  #include <rte_ethdev.h>
>  #include <rte_dev.h>
> @@ -86,9 +82,6 @@
>  #include <rte_log.h>
>  #include <rte_alarm.h>
>  #include <rte_memory.h>
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic error "-Wpedantic"
> -#endif
>  
>  /* Generated configuration header. */
>  #include "mlx4_autoconf.h"
> @@ -96,21 +89,6 @@
>  /* PMD header. */
>  #include "mlx4.h"
>  
<...>

> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index 4c7505e..70c9ecd 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
<...>
> +
> +/* Verbs header. */
> +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic ignored "-Wpedantic"
> +#endif
> +#include <infiniband/verbs.h>
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic error "-Wpedantic"
> +#endif

--> [1]

<...>

> +
> +void priv_lock(struct priv *priv);
> +void priv_unlock(struct priv *priv);

It can be good to mention in commit log that these functions are now
exported.

> +
>  #endif /* RTE_PMD_MLX4_H_ */
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/2] net/mlx4: split the definitions to the header file
  2017-02-22 19:04   ` Ferruh Yigit
@ 2017-02-23 10:44     ` Vasily Philipov
  2017-03-06  9:24       ` Ferruh Yigit
  0 siblings, 1 reply; 15+ messages in thread
From: Vasily Philipov @ 2017-02-23 10:44 UTC (permalink / raw)
  To: Ferruh Yigit, dev; +Cc: Adrien Mazarguil, Nélio Laranjeiro

Hi Ferruh,

> -----Original Message-----
> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
> Sent: Wednesday, February 22, 2017 21:05
> To: Vasily Philipov <vasilyf@mellanox.com>; dev@dpdk.org
> Cc: Adrien Mazarguil <adrien.mazarguil@6wind.com>; Nélio Laranjeiro
> <nelio.laranjeiro@6wind.com>
> Subject: Re: [dpdk-dev] [PATCH v2 1/2] net/mlx4: split the definitions to the
> header file
> 
> On 2/22/2017 1:42 PM, Vasily Philipov wrote:
> > Make some structs/defines visible from different source files by
> > placing them into mlx4.h header.
> >
> > Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> > ---
> >  drivers/net/mlx4/mlx4.c | 183
> > ++--------------------------------------------
> >  drivers/net/mlx4/mlx4.h | 187
> > +++++++++++++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 189 insertions(+), 181 deletions(-)
> >
> > diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index
> > 79efaaa..82ccac8 100644
> > --- a/drivers/net/mlx4/mlx4.c
> > +++ b/drivers/net/mlx4/mlx4.c
> > @@ -1,8 +1,8 @@
> >  /*-
> >   *   BSD LICENSE
> >   *
> > - *   Copyright 2012-2015 6WIND S.A.
> > - *   Copyright 2012 Mellanox.
> > + *   Copyright 2012-2017 6WIND S.A.
> > + *   Copyright 2012-2017 Mellanox.
> 
> Can someone knowledgeable about Copyright help please?
> 
> What is the year field in Copyright line for?
> And above change updates Copyright from 2012 to 2012-2017, is this correct?
> 

The year line was changes in order to show when the file was changed the last time...

> >   *
> >   *   Redistribution and use in source and binary forms, with or without
> >   *   modification, are permitted provided that the following conditions
> > @@ -68,10 +68,6 @@
> >  #pragma GCC diagnostic error "-Wpedantic"
> >  #endif
> 
> Above invisible lines are  "#include <infiniband/verbs.h>" wrapped with
> #pragma for pedantic.
> 
> That piece moved to "mlx4.h" [1], which included a few lines later, so can
> these line be removed from this line?
> 
> >
> > -/* DPDK headers don't like -pedantic. */ -#ifdef PEDANTIC -#pragma
> > GCC diagnostic ignored "-Wpedantic"
> > -#endif
> 
> Comment says "DPDK headers don't like -pedantic", won't removing
> #pragma cause compile error with pedantic option?
> 

It is not necessary anymore, was fixed with the next commit:

commit c0362128c57a0ad22ea311a9657bb15a44b70793
Author: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Date:   Mon Jun 29 11:34:52 2015 +0200

    eal: fix pedantic build of mlx4 debug mode

> >  #include <rte_ether.h>
> >  #include <rte_ethdev.h>
> >  #include <rte_dev.h>
> > @@ -86,9 +82,6 @@
> >  #include <rte_log.h>
> >  #include <rte_alarm.h>
> >  #include <rte_memory.h>
> > -#ifdef PEDANTIC
> > -#pragma GCC diagnostic error "-Wpedantic"
> > -#endif
> >
> >  /* Generated configuration header. */  #include "mlx4_autoconf.h"
> > @@ -96,21 +89,6 @@
> >  /* PMD header. */
> >  #include "mlx4.h"
> >
> <...>
> 
> > diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index
> > 4c7505e..70c9ecd 100644
> > --- a/drivers/net/mlx4/mlx4.h
> > +++ b/drivers/net/mlx4/mlx4.h
> <...>
> > +
> > +/* Verbs header. */
> > +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic.
> > +*/ #ifdef PEDANTIC #pragma GCC diagnostic ignored "-Wpedantic"
> > +#endif
> > +#include <infiniband/verbs.h>
> > +#ifdef PEDANTIC
> > +#pragma GCC diagnostic error "-Wpedantic"
> > +#endif
> 
> --> [1]
> 
> <...>
> 
> > +
> > +void priv_lock(struct priv *priv);
> > +void priv_unlock(struct priv *priv);
> 
> It can be good to mention in commit log that these functions are now
> exported.
> 
> > +
> >  #endif /* RTE_PMD_MLX4_H_ */
> >

I will fix the rest of the issues and will send the v3 patches.

Thank you,
Vasily

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [dpdk-dev] [PATCH v3 1/2] net/mlx4: split the definitions to the header file
  2017-02-21 14:07 [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
                   ` (3 preceding siblings ...)
  2017-02-22 13:42 ` [dpdk-dev] [PATCH v2 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
@ 2017-03-05  7:51 ` Vasily Philipov
  2017-03-20  9:19   ` Nélio Laranjeiro
  2017-03-05  7:51 ` [dpdk-dev] [PATCH v3 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
  5 siblings, 1 reply; 15+ messages in thread
From: Vasily Philipov @ 2017-03-05  7:51 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

Make priv_lock/priv_unlock functions and some other structs/defines visible
from different source files by placing them into mlx4.h header.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c | 193 +-----------------------------------------------
 drivers/net/mlx4/mlx4.h | 187 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 189 insertions(+), 191 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 79efaaa..8f6c57f 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -1,8 +1,8 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright 2012-2015 6WIND S.A.
- *   Copyright 2012 Mellanox.
+ *   Copyright 2012-2017 6WIND S.A.
+ *   Copyright 2012-2017 Mellanox.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -58,20 +58,6 @@
 #include <linux/sockios.h>
 #include <fcntl.h>
 
-/* Verbs header. */
-/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-#include <infiniband/verbs.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
-
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_ether.h>
 #include <rte_ethdev.h>
 #include <rte_dev.h>
@@ -86,9 +72,6 @@
 #include <rte_log.h>
 #include <rte_alarm.h>
 #include <rte_memory.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
 
 /* Generated configuration header. */
 #include "mlx4_autoconf.h"
@@ -96,21 +79,6 @@
 /* PMD header. */
 #include "mlx4.h"
 
-/* Runtime logging through RTE_LOG() is enabled when not in debugging mode.
- * Intermediate LOG_*() macros add the required end-of-line characters. */
-#ifndef NDEBUG
-#define INFO(...) DEBUG(__VA_ARGS__)
-#define WARN(...) DEBUG(__VA_ARGS__)
-#define ERROR(...) DEBUG(__VA_ARGS__)
-#else
-#define LOG__(level, m, ...) \
-	RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
-#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
-#define INFO(...) LOG_(INFO, __VA_ARGS__)
-#define WARN(...) LOG_(WARNING, __VA_ARGS__)
-#define ERROR(...) LOG_(ERR, __VA_ARGS__)
-#endif
-
 /* Convenience macros for accessing mbuf fields. */
 #define NEXT(m) ((m)->next)
 #define DATA_LEN(m) ((m)->data_len)
@@ -137,157 +105,6 @@
 	 (((val) & (from)) / ((from) / (to))) : \
 	 (((val) & (from)) * ((to) / (from))))
 
-struct mlx4_rxq_stats {
-	unsigned int idx; /**< Mapping index. */
-#ifdef MLX4_PMD_SOFT_COUNTERS
-	uint64_t ipackets;  /**< Total of successfully received packets. */
-	uint64_t ibytes;    /**< Total of successfully received bytes. */
-#endif
-	uint64_t idropped;  /**< Total of packets dropped when RX ring full. */
-	uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
-};
-
-struct mlx4_txq_stats {
-	unsigned int idx; /**< Mapping index. */
-#ifdef MLX4_PMD_SOFT_COUNTERS
-	uint64_t opackets; /**< Total of successfully sent packets. */
-	uint64_t obytes;   /**< Total of successfully sent bytes. */
-#endif
-	uint64_t odropped; /**< Total of packets not sent when TX ring full. */
-};
-
-/* RX element (scattered packets). */
-struct rxq_elt_sp {
-	struct ibv_recv_wr wr; /* Work Request. */
-	struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
-	struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
-};
-
-/* RX element. */
-struct rxq_elt {
-	struct ibv_recv_wr wr; /* Work Request. */
-	struct ibv_sge sge; /* Scatter/Gather Element. */
-	/* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
-};
-
-/* RX queue descriptor. */
-struct rxq {
-	struct priv *priv; /* Back pointer to private data. */
-	struct rte_mempool *mp; /* Memory Pool for allocations. */
-	struct ibv_mr *mr; /* Memory Region (for mp). */
-	struct ibv_cq *cq; /* Completion Queue. */
-	struct ibv_qp *qp; /* Queue Pair. */
-	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
-	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
-	/*
-	 * Each VLAN ID requires a separate flow steering rule.
-	 */
-	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
-	struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
-	struct ibv_flow *promisc_flow; /* Promiscuous flow. */
-	struct ibv_flow *allmulti_flow; /* Multicast flow. */
-	unsigned int port_id; /* Port ID for incoming packets. */
-	unsigned int elts_n; /* (*elts)[] length. */
-	unsigned int elts_head; /* Current index in (*elts)[]. */
-	union {
-		struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
-		struct rxq_elt (*no_sp)[]; /* RX elements. */
-	} elts;
-	unsigned int sp:1; /* Use scattered RX elements. */
-	unsigned int csum:1; /* Enable checksum offloading. */
-	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
-	struct mlx4_rxq_stats stats; /* RX queue counters. */
-	unsigned int socket; /* CPU socket ID for allocations. */
-	struct ibv_exp_res_domain *rd; /* Resource Domain. */
-};
-
-/* TX element. */
-struct txq_elt {
-	struct rte_mbuf *buf;
-};
-
-/* Linear buffer type. It is used when transmitting buffers with too many
- * segments that do not fit the hardware queue (see max_send_sge).
- * Extra segments are copied (linearized) in such buffers, replacing the
- * last SGE during TX.
- * The size is arbitrary but large enough to hold a jumbo frame with
- * 8 segments considering mbuf.buf_len is about 2048 bytes. */
-typedef uint8_t linear_t[16384];
-
-/* TX queue descriptor. */
-struct txq {
-	struct priv *priv; /* Back pointer to private data. */
-	struct {
-		const struct rte_mempool *mp; /* Cached Memory Pool. */
-		struct ibv_mr *mr; /* Memory Region (for mp). */
-		uint32_t lkey; /* mr->lkey */
-	} mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
-	struct ibv_cq *cq; /* Completion Queue. */
-	struct ibv_qp *qp; /* Queue Pair. */
-	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
-	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
-#if MLX4_PMD_MAX_INLINE > 0
-	uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
-#endif
-	unsigned int elts_n; /* (*elts)[] length. */
-	struct txq_elt (*elts)[]; /* TX elements. */
-	unsigned int elts_head; /* Current index in (*elts)[]. */
-	unsigned int elts_tail; /* First element awaiting completion. */
-	unsigned int elts_comp; /* Number of completion requests. */
-	unsigned int elts_comp_cd; /* Countdown for next completion request. */
-	unsigned int elts_comp_cd_init; /* Initial value for countdown. */
-	struct mlx4_txq_stats stats; /* TX queue counters. */
-	linear_t (*elts_linear)[]; /* Linearized buffers. */
-	struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
-	unsigned int socket; /* CPU socket ID for allocations. */
-	struct ibv_exp_res_domain *rd; /* Resource Domain. */
-};
-
-struct priv {
-	struct rte_eth_dev *dev; /* Ethernet device. */
-	struct ibv_context *ctx; /* Verbs context. */
-	struct ibv_device_attr device_attr; /* Device properties. */
-	struct ibv_pd *pd; /* Protection Domain. */
-	/*
-	 * MAC addresses array and configuration bit-field.
-	 * An extra entry that cannot be modified by the DPDK is reserved
-	 * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
-	 */
-	struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
-	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
-	/* VLAN filters. */
-	struct {
-		unsigned int enabled:1; /* If enabled. */
-		unsigned int id:12; /* VLAN ID (0-4095). */
-	} vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
-	/* Device properties. */
-	uint16_t mtu; /* Configured MTU. */
-	uint8_t port; /* Physical port number. */
-	unsigned int started:1; /* Device started, flows enabled. */
-	unsigned int promisc:1; /* Device in promiscuous mode. */
-	unsigned int allmulti:1; /* Device receives all multicast packets. */
-	unsigned int hw_qpg:1; /* QP groups are supported. */
-	unsigned int hw_tss:1; /* TSS is supported. */
-	unsigned int hw_rss:1; /* RSS is supported. */
-	unsigned int hw_csum:1; /* Checksum offload is supported. */
-	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
-	unsigned int rss:1; /* RSS is enabled. */
-	unsigned int vf:1; /* This is a VF device. */
-	unsigned int pending_alarm:1; /* An alarm is pending. */
-#ifdef INLINE_RECV
-	unsigned int inl_recv_size; /* Inline recv size */
-#endif
-	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
-	/* RX/TX queues. */
-	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
-	unsigned int rxqs_n; /* RX queues array size. */
-	unsigned int txqs_n; /* TX queues array size. */
-	struct rxq *(*rxqs)[]; /* RX queues. */
-	struct txq *(*txqs)[]; /* TX queues. */
-	struct rte_intr_handle intr_handle; /* Interrupt handler. */
-	rte_spinlock_t lock; /* Lock for control functions. */
-};
-
 /* Local storage for secondary process data. */
 struct mlx4_secondary_data {
 	struct rte_eth_dev_data data; /* Local device data. */
@@ -335,8 +152,7 @@ struct mlx4_secondary_data {
  * @param priv
  *   Pointer to private structure.
  */
-static void
-priv_lock(struct priv *priv)
+void priv_lock(struct priv *priv)
 {
 	rte_spinlock_lock(&priv->lock);
 }
@@ -347,8 +163,7 @@ struct mlx4_secondary_data {
  * @param priv
  *   Pointer to private structure.
  */
-static void
-priv_unlock(struct priv *priv)
+void priv_unlock(struct priv *priv)
 {
 	rte_spinlock_unlock(&priv->lock);
 }
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 4c7505e..70c9ecd 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -1,8 +1,8 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright 2012-2015 6WIND S.A.
- *   Copyright 2012 Mellanox.
+ *   Copyright 2012-2017 6WIND S.A.
+ *   Copyright 2012-2017 Mellanox.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -39,6 +39,33 @@
 #include <limits.h>
 
 /*
+ * Runtime logging through RTE_LOG() is enabled when not in debugging mode.
+ * Intermediate LOG_*() macros add the required end-of-line characters.
+ */
+#ifndef NDEBUG
+#define INFO(...) DEBUG(__VA_ARGS__)
+#define WARN(...) DEBUG(__VA_ARGS__)
+#define ERROR(...) DEBUG(__VA_ARGS__)
+#else
+#define LOG__(level, m, ...) \
+	RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
+#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
+#define INFO(...) LOG_(INFO, __VA_ARGS__)
+#define WARN(...) LOG_(WARNING, __VA_ARGS__)
+#define ERROR(...) LOG_(ERR, __VA_ARGS__)
+#endif
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+/*
  * Maximum number of simultaneous MAC addresses supported.
  *
  * According to ConnectX's Programmer Reference Manual:
@@ -160,4 +187,160 @@ enum {
 #define claim_positive(...) (__VA_ARGS__)
 #endif /* NDEBUG */
 
+struct mlx4_rxq_stats {
+	unsigned int idx; /**< Mapping index. */
+#ifdef MLX4_PMD_SOFT_COUNTERS
+	uint64_t ipackets; /**< Total of successfully received packets. */
+	uint64_t ibytes; /**< Total of successfully received bytes. */
+#endif
+	uint64_t idropped; /**< Total of packets dropped when RX ring full. */
+	uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
+};
+
+/* RX element (scattered packets). */
+struct rxq_elt_sp {
+	struct ibv_recv_wr wr; /* Work Request. */
+	struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
+	struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
+};
+
+/* RX element. */
+struct rxq_elt {
+	struct ibv_recv_wr wr; /* Work Request. */
+	struct ibv_sge sge; /* Scatter/Gather Element. */
+	/* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
+};
+
+/* RX queue descriptor. */
+struct rxq {
+	struct priv *priv; /* Back pointer to private data. */
+	struct rte_mempool *mp; /* Memory Pool for allocations. */
+	struct ibv_mr *mr; /* Memory Region (for mp). */
+	struct ibv_cq *cq; /* Completion Queue. */
+	struct ibv_qp *qp; /* Queue Pair. */
+	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
+	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
+	/*
+	 * Each VLAN ID requires a separate flow steering rule.
+	 */
+	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
+	struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
+	struct ibv_flow *promisc_flow; /* Promiscuous flow. */
+	struct ibv_flow *allmulti_flow; /* Multicast flow. */
+	unsigned int port_id; /* Port ID for incoming packets. */
+	unsigned int elts_n; /* (*elts)[] length. */
+	unsigned int elts_head; /* Current index in (*elts)[]. */
+	union {
+		struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
+		struct rxq_elt (*no_sp)[]; /* RX elements. */
+	} elts;
+	unsigned int sp:1; /* Use scattered RX elements. */
+	unsigned int csum:1; /* Enable checksum offloading. */
+	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
+	struct mlx4_rxq_stats stats; /* RX queue counters. */
+	unsigned int socket; /* CPU socket ID for allocations. */
+	struct ibv_exp_res_domain *rd; /* Resource Domain. */
+};
+
+/* TX element. */
+struct txq_elt {
+	struct rte_mbuf *buf;
+};
+
+struct mlx4_txq_stats {
+	unsigned int idx; /**< Mapping index. */
+#ifdef MLX4_PMD_SOFT_COUNTERS
+	uint64_t opackets; /**< Total of successfully sent packets. */
+	uint64_t obytes;   /**< Total of successfully sent bytes. */
+#endif
+	uint64_t odropped; /**< Total of packets not sent when TX ring full. */
+};
+
+/*
+ * Linear buffer type. It is used when transmitting buffers with too many
+ * segments that do not fit the hardware queue (see max_send_sge).
+ * Extra segments are copied (linearized) in such buffers, replacing the
+ * last SGE during TX.
+ * The size is arbitrary but large enough to hold a jumbo frame with
+ * 8 segments considering mbuf.buf_len is about 2048 bytes.
+ */
+typedef uint8_t linear_t[16384];
+
+/* TX queue descriptor. */
+struct txq {
+	struct priv *priv; /* Back pointer to private data. */
+	struct {
+		const struct rte_mempool *mp; /* Cached Memory Pool. */
+		struct ibv_mr *mr; /* Memory Region (for mp). */
+		uint32_t lkey; /* mr->lkey */
+	} mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
+	struct ibv_cq *cq; /* Completion Queue. */
+	struct ibv_qp *qp; /* Queue Pair. */
+	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
+	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
+#if MLX4_PMD_MAX_INLINE > 0
+	uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
+#endif
+	unsigned int elts_n; /* (*elts)[] length. */
+	struct txq_elt (*elts)[]; /* TX elements. */
+	unsigned int elts_head; /* Current index in (*elts)[]. */
+	unsigned int elts_tail; /* First element awaiting completion. */
+	unsigned int elts_comp; /* Number of completion requests. */
+	unsigned int elts_comp_cd; /* Countdown for next completion request. */
+	unsigned int elts_comp_cd_init; /* Initial value for countdown. */
+	struct mlx4_txq_stats stats; /* TX queue counters. */
+	linear_t (*elts_linear)[]; /* Linearized buffers. */
+	struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
+	unsigned int socket; /* CPU socket ID for allocations. */
+	struct ibv_exp_res_domain *rd; /* Resource Domain. */
+};
+
+struct priv {
+	struct rte_eth_dev *dev; /* Ethernet device. */
+	struct ibv_context *ctx; /* Verbs context. */
+	struct ibv_device_attr device_attr; /* Device properties. */
+	struct ibv_pd *pd; /* Protection Domain. */
+	/*
+	 * MAC addresses array and configuration bit-field.
+	 * An extra entry that cannot be modified by the DPDK is reserved
+	 * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
+	 */
+	struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
+	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
+	/* VLAN filters. */
+	struct {
+		unsigned int enabled:1; /* If enabled. */
+		unsigned int id:12; /* VLAN ID (0-4095). */
+	} vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
+	/* Device properties. */
+	uint16_t mtu; /* Configured MTU. */
+	uint8_t port; /* Physical port number. */
+	unsigned int started:1; /* Device started, flows enabled. */
+	unsigned int promisc:1; /* Device in promiscuous mode. */
+	unsigned int allmulti:1; /* Device receives all multicast packets. */
+	unsigned int hw_qpg:1; /* QP groups are supported. */
+	unsigned int hw_tss:1; /* TSS is supported. */
+	unsigned int hw_rss:1; /* RSS is supported. */
+	unsigned int hw_csum:1; /* Checksum offload is supported. */
+	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
+	unsigned int rss:1; /* RSS is enabled. */
+	unsigned int vf:1; /* This is a VF device. */
+	unsigned int pending_alarm:1; /* An alarm is pending. */
+#ifdef INLINE_RECV
+	unsigned int inl_recv_size; /* Inline recv size */
+#endif
+	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
+	/* RX/TX queues. */
+	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
+	unsigned int rxqs_n; /* RX queues array size. */
+	unsigned int txqs_n; /* TX queues array size. */
+	struct rxq *(*rxqs)[]; /* RX queues. */
+	struct txq *(*txqs)[]; /* TX queues. */
+	struct rte_intr_handle intr_handle; /* Interrupt handler. */
+	rte_spinlock_t lock; /* Lock for control functions. */
+};
+
+void priv_lock(struct priv *priv);
+void priv_unlock(struct priv *priv);
+
 #endif /* RTE_PMD_MLX4_H_ */
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [dpdk-dev] [PATCH v3 2/2] net/mlx4: support basic flow items and actions
  2017-02-21 14:07 [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
                   ` (4 preceding siblings ...)
  2017-03-05  7:51 ` [dpdk-dev] [PATCH v3 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
@ 2017-03-05  7:51 ` Vasily Philipov
  2017-03-20  9:19   ` Nélio Laranjeiro
  5 siblings, 1 reply; 15+ messages in thread
From: Vasily Philipov @ 2017-03-05  7:51 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

Adding support for the next items: eth, vlan, ipv4, udp, tcp and for the
next actions: queue, drop

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/Makefile    |    3 +-
 drivers/net/mlx4/mlx4.c      |   91 +++-
 drivers/net/mlx4/mlx4.h      |    3 +
 drivers/net/mlx4/mlx4_flow.c | 1043 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx4/mlx4_flow.h |  104 +++++
 5 files changed, 1228 insertions(+), 16 deletions(-)
 create mode 100644 drivers/net/mlx4/mlx4_flow.c
 create mode 100644 drivers/net/mlx4/mlx4_flow.h

diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 68c5902..1d463f7 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -36,6 +36,7 @@ LIB = librte_pmd_mlx4.a
 
 # Sources.
 SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_flow.c
 
 # Dependencies.
 DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += lib/librte_ether
@@ -129,7 +130,7 @@ mlx4_autoconf.h: mlx4_autoconf.h.new
 		cmp '$<' '$@' $(AUTOCONF_OUTPUT) || \
 		mv '$<' '$@'
 
-mlx4.o: mlx4_autoconf.h
+$(SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD):.c=.o): mlx4_autoconf.h
 
 clean_mlx4: FORCE
 	$Q rm -f -- mlx4_autoconf.h mlx4_autoconf.h.new
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 8f6c57f..bb0c647 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -72,12 +72,14 @@
 #include <rte_log.h>
 #include <rte_alarm.h>
 #include <rte_memory.h>
+#include <rte_flow.h>
 
 /* Generated configuration header. */
 #include "mlx4_autoconf.h"
 
-/* PMD header. */
+/* PMD headers. */
 #include "mlx4.h"
+#include "mlx4_flow.h"
 
 /* Convenience macros for accessing mbuf fields. */
 #define NEXT(m) ((m)->next)
@@ -2341,6 +2343,7 @@ struct txq_mp2mr_mbuf_check_data {
 	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
 	*attr = (struct ibv_flow_attr){
 		.type = IBV_FLOW_ATTR_NORMAL,
+		.priority = 3,
 		.num_of_specs = 1,
 		.port = priv->port,
 		.flags = 0
@@ -3928,6 +3931,7 @@ struct txq_mp2mr_mbuf_check_data {
 	unsigned int i = 0;
 	unsigned int r;
 	struct rxq *rxq;
+	int ret;
 
 	if (mlx4_is_secondary())
 		return -E_RTE_SECONDARY;
@@ -3947,8 +3951,6 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	/* Iterate only once when RSS is enabled. */
 	do {
-		int ret;
-
 		/* Ignore nonexistent RX queues. */
 		if (rxq == NULL)
 			continue;
@@ -3961,22 +3963,30 @@ struct txq_mp2mr_mbuf_check_data {
 			continue;
 		WARN("%p: QP flow attachment failed: %s",
 		     (void *)dev, strerror(ret));
-		/* Rollback. */
-		while (i != 0) {
-			rxq = (*priv->rxqs)[--i];
-			if (rxq != NULL) {
-				rxq_allmulticast_disable(rxq);
-				rxq_promiscuous_disable(rxq);
-				rxq_mac_addrs_del(rxq);
-			}
-		}
-		priv->started = 0;
-		priv_unlock(priv);
-		return -ret;
+		goto err;
 	} while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
 	priv_dev_interrupt_handler_install(priv, dev);
+	ret = mlx4_priv_flow_start(priv);
+	if (ret) {
+		ERROR("%p: flow start failed: %s",
+		      (void *)dev, strerror(ret));
+		goto err;
+	}
 	priv_unlock(priv);
 	return 0;
+err:
+	/* Rollback. */
+	while (i != 0) {
+		rxq = (*priv->rxqs)[i--];
+		if (rxq != NULL) {
+			rxq_allmulticast_disable(rxq);
+			rxq_promiscuous_disable(rxq);
+			rxq_mac_addrs_del(rxq);
+		}
+	}
+	priv->started = 0;
+	priv_unlock(priv);
+	return -ret;
 }
 
 /**
@@ -4011,6 +4021,7 @@ struct txq_mp2mr_mbuf_check_data {
 		rxq = (*priv->rxqs)[0];
 		r = priv->rxqs_n;
 	}
+	mlx4_priv_flow_stop(priv);
 	/* Iterate only once when RSS is enabled. */
 	do {
 		/* Ignore nonexistent RX queues. */
@@ -5012,6 +5023,55 @@ struct txq_mp2mr_mbuf_check_data {
 	return -ret;
 }
 
+const struct rte_flow_ops mlx4_flow_ops = {
+	.validate = mlx4_flow_validate,
+	.create = mlx4_flow_create,
+	.destroy = mlx4_flow_destroy,
+	.flush = mlx4_flow_flush,
+	.query = NULL,
+};
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param filter_type
+ *   Filter type.
+ * @param filter_op
+ *   Operation to perform.
+ * @param arg
+ *   Pointer to operation-specific structure.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+mlx4_dev_filter_ctrl(struct rte_eth_dev *dev,
+		     enum rte_filter_type filter_type,
+		     enum rte_filter_op filter_op,
+		     void *arg)
+{
+	int ret = EINVAL;
+
+	switch (filter_type) {
+	case RTE_ETH_FILTER_GENERIC:
+		if (filter_op != RTE_ETH_FILTER_GET)
+			return -EINVAL;
+		*(const void **)arg = &mlx4_flow_ops;
+		return 0;
+	case RTE_ETH_FILTER_FDIR:
+		DEBUG("%p: filter type FDIR is not supported by this PMD",
+		      (void *)dev);
+		break;
+	default:
+		ERROR("%p: filter type (%d) not supported",
+		      (void *)dev, filter_type);
+		break;
+	}
+	return -ret;
+}
+
 static const struct eth_dev_ops mlx4_dev_ops = {
 	.dev_configure = mlx4_dev_configure,
 	.dev_start = mlx4_dev_start,
@@ -5046,6 +5106,7 @@ struct txq_mp2mr_mbuf_check_data {
 	.mac_addr_add = mlx4_mac_addr_add,
 	.mac_addr_set = mlx4_mac_addr_set,
 	.mtu_set = mlx4_dev_set_mtu,
+	.filter_ctrl = mlx4_dev_filter_ctrl,
 };
 
 /**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 70c9ecd..fac408b 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -295,6 +295,8 @@ struct txq {
 	struct ibv_exp_res_domain *rd; /* Resource Domain. */
 };
 
+struct rte_flow;
+
 struct priv {
 	struct rte_eth_dev *dev; /* Ethernet device. */
 	struct ibv_context *ctx; /* Verbs context. */
@@ -337,6 +339,7 @@ struct priv {
 	struct rxq *(*rxqs)[]; /* RX queues. */
 	struct txq *(*txqs)[]; /* TX queues. */
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
+	LIST_HEAD(mlx4_flows, rte_flow) flows;
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
 
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
new file mode 100644
index 0000000..65537c7
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -0,0 +1,1043 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_malloc.h>
+
+/* Generated configuration header. */
+#include "mlx4_autoconf.h"
+
+/* PMD headers. */
+#include "mlx4.h"
+#include "mlx4_flow.h"
+
+/** Static initializer for items. */
+#define ITEMS(...) \
+	(const enum rte_flow_item_type []){ \
+		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
+	}
+
+/** Structure to generate a simple graph of layers supported by the NIC. */
+struct mlx4_flow_items {
+	/** List of possible actions for these items. */
+	const enum rte_flow_action_type *const actions;
+	/** Bit-masks corresponding to the possibilities for the item. */
+	const void *mask;
+	/**
+	 * Default bit-masks to use when item->mask is not provided. When
+	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
+	 * used instead.
+	 */
+	const void *default_mask;
+	/** Bit-masks size in bytes. */
+	const unsigned int mask_sz;
+	/**
+	 * Check support for a given item.
+	 *
+	 * @param item[in]
+	 *   Item specification.
+	 * @param mask[in]
+	 *   Bit-masks covering supported fields to compare with spec,
+	 *   last and mask in
+	 *   \item.
+	 * @param size
+	 *   Bit-Mask size in bytes.
+	 *
+	 * @return
+	 *   0 on success, negative value otherwise.
+	 */
+	int (*validate)(const struct rte_flow_item *item,
+			const uint8_t *mask, unsigned int size);
+	/**
+	 * Conversion function from rte_flow to NIC specific flow.
+	 *
+	 * @param item
+	 *   rte_flow item to convert.
+	 * @param default_mask
+	 *   Default bit-masks to use when item->mask is not provided.
+	 * @param data
+	 *   Internal structure to store the conversion.
+	 *
+	 * @return
+	 *   0 on success, negative value otherwise.
+	 */
+	int (*convert)(const struct rte_flow_item *item,
+		       const void *default_mask,
+		       void *data);
+	/** Size in bytes of the destination structure. */
+	const unsigned int dst_sz;
+	/** List of possible following items.  */
+	const enum rte_flow_item_type *const items;
+};
+
+/** Valid action for this PMD. */
+static const enum rte_flow_action_type valid_actions[] = {
+	RTE_FLOW_ACTION_TYPE_DROP,
+	RTE_FLOW_ACTION_TYPE_QUEUE,
+	RTE_FLOW_ACTION_TYPE_END,
+};
+
+/**
+ * Convert Ethernet item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx4_flow_create_eth(const struct rte_flow_item *item,
+		     const void *default_mask,
+		     void *data)
+{
+	const struct rte_flow_item_eth *spec = item->spec;
+	const struct rte_flow_item_eth *mask = item->mask;
+	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	struct ibv_flow_spec_eth *eth;
+	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+	unsigned int i;
+
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 2;
+	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*eth = (struct ibv_flow_spec_eth) {
+		.type = IBV_FLOW_SPEC_ETH,
+		.size = eth_size,
+	};
+	if (!spec) {
+		flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
+		return 0;
+	}
+	if (!mask)
+		mask = default_mask;
+	memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
+	memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+	memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
+	memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+	/* Remove unwanted bits from values. */
+	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+		eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
+		eth->val.src_mac[i] &= eth->mask.src_mac[i];
+	}
+	return 0;
+}
+
+/**
+ * Convert VLAN item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx4_flow_create_vlan(const struct rte_flow_item *item,
+		      const void *default_mask,
+		      void *data)
+{
+	const struct rte_flow_item_vlan *spec = item->spec;
+	const struct rte_flow_item_vlan *mask = item->mask;
+	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	struct ibv_flow_spec_eth *eth;
+	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+
+	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
+	if (!spec)
+		return 0;
+	if (!mask)
+		mask = default_mask;
+	eth->val.vlan_tag = spec->tci;
+	eth->mask.vlan_tag = mask->tci;
+	eth->val.vlan_tag &= eth->mask.vlan_tag;
+	return 0;
+}
+
+/**
+ * Convert IPv4 item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx4_flow_create_ipv4(const struct rte_flow_item *item,
+		      const void *default_mask,
+		      void *data)
+{
+	const struct rte_flow_item_ipv4 *spec = item->spec;
+	const struct rte_flow_item_ipv4 *mask = item->mask;
+	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	struct ibv_flow_spec_ipv4 *ipv4;
+	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4);
+
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 1;
+	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*ipv4 = (struct ibv_flow_spec_ipv4) {
+		.type = IBV_FLOW_SPEC_IPV4,
+		.size = ipv4_size,
+	};
+	if (!spec)
+		return 0;
+	ipv4->val = (struct ibv_flow_ipv4_filter) {
+		.src_ip = spec->hdr.src_addr,
+		.dst_ip = spec->hdr.dst_addr,
+	};
+	if (!mask)
+		mask = default_mask;
+	ipv4->mask = (struct ibv_flow_ipv4_filter) {
+		.src_ip = mask->hdr.src_addr,
+		.dst_ip = mask->hdr.dst_addr,
+	};
+	/* Remove unwanted bits from values. */
+	ipv4->val.src_ip &= ipv4->mask.src_ip;
+	ipv4->val.dst_ip &= ipv4->mask.dst_ip;
+	return 0;
+}
+
+/**
+ * Convert UDP item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx4_flow_create_udp(const struct rte_flow_item *item,
+		     const void *default_mask,
+		     void *data)
+{
+	const struct rte_flow_item_udp *spec = item->spec;
+	const struct rte_flow_item_udp *mask = item->mask;
+	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	struct ibv_flow_spec_tcp_udp *udp;
+	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 0;
+	udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*udp = (struct ibv_flow_spec_tcp_udp) {
+		.type = IBV_FLOW_SPEC_UDP,
+		.size = udp_size,
+	};
+	if (!spec)
+		return 0;
+	udp->val.dst_port = spec->hdr.dst_port;
+	udp->val.src_port = spec->hdr.src_port;
+	if (!mask)
+		mask = default_mask;
+	udp->mask.dst_port = mask->hdr.dst_port;
+	udp->mask.src_port = mask->hdr.src_port;
+	/* Remove unwanted bits from values. */
+	udp->val.src_port &= udp->mask.src_port;
+	udp->val.dst_port &= udp->mask.dst_port;
+	return 0;
+}
+
+/**
+ * Convert TCP item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx4_flow_create_tcp(const struct rte_flow_item *item,
+		     const void *default_mask,
+		     void *data)
+{
+	const struct rte_flow_item_tcp *spec = item->spec;
+	const struct rte_flow_item_tcp *mask = item->mask;
+	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	struct ibv_flow_spec_tcp_udp *tcp;
+	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 0;
+	tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*tcp = (struct ibv_flow_spec_tcp_udp) {
+		.type = IBV_FLOW_SPEC_TCP,
+		.size = tcp_size,
+	};
+	if (!spec)
+		return 0;
+	tcp->val.dst_port = spec->hdr.dst_port;
+	tcp->val.src_port = spec->hdr.src_port;
+	if (!mask)
+		mask = default_mask;
+	tcp->mask.dst_port = mask->hdr.dst_port;
+	tcp->mask.src_port = mask->hdr.src_port;
+	/* Remove unwanted bits from values. */
+	tcp->val.src_port &= tcp->mask.src_port;
+	tcp->val.dst_port &= tcp->mask.dst_port;
+	return 0;
+}
+
+/**
+ * Check support for a given item.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param mask[in]
+ *   Bit-masks covering supported fields to compare with spec, last and mask in
+ *   \item.
+ * @param size
+ *   Bit-Mask size in bytes.
+ *
+ * @return
+ *   0 on success, negative value otherwise.
+ */
+static int
+mlx4_flow_item_validate(const struct rte_flow_item *item,
+			const uint8_t *mask, unsigned int size)
+{
+	int ret = 0;
+
+	if (!item->spec && (item->mask || item->last))
+		return -1;
+	if (item->spec && !item->mask) {
+		unsigned int i;
+		const uint8_t *spec = item->spec;
+
+		for (i = 0; i < size; ++i)
+			if ((spec[i] | mask[i]) != mask[i])
+				return -1;
+	}
+	if (item->last && !item->mask) {
+		unsigned int i;
+		const uint8_t *spec = item->last;
+
+		for (i = 0; i < size; ++i)
+			if ((spec[i] | mask[i]) != mask[i])
+				return -1;
+	}
+	if (item->spec && item->last) {
+		uint8_t spec[size];
+		uint8_t last[size];
+		const uint8_t *apply = mask;
+		unsigned int i;
+
+		if (item->mask)
+			apply = item->mask;
+		for (i = 0; i < size; ++i) {
+			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
+			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
+		}
+		ret = memcmp(spec, last, size);
+	}
+	return ret;
+}
+
+static int
+mlx4_flow_validate_eth(const struct rte_flow_item *item,
+		       const uint8_t *mask, unsigned int size)
+{
+	if (item->mask) {
+		const struct rte_flow_item_eth *mask = item->mask;
+
+		if (mask->dst.addr_bytes[0] != 0xff ||
+				mask->dst.addr_bytes[1] != 0xff ||
+				mask->dst.addr_bytes[2] != 0xff ||
+				mask->dst.addr_bytes[3] != 0xff ||
+				mask->dst.addr_bytes[4] != 0xff ||
+				mask->dst.addr_bytes[5] != 0xff)
+			return -1;
+	}
+	return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_vlan(const struct rte_flow_item *item,
+			const uint8_t *mask, unsigned int size)
+{
+	if (item->mask) {
+		const struct rte_flow_item_vlan *mask = item->mask;
+
+		if (mask->tci != 0 &&
+		    ntohs(mask->tci) != 0x0fff)
+			return -1;
+	}
+	return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_ipv4(const struct rte_flow_item *item,
+			const uint8_t *mask, unsigned int size)
+{
+	if (item->mask) {
+		const struct rte_flow_item_ipv4 *mask = item->mask;
+
+		if (mask->hdr.src_addr != 0 &&
+		    mask->hdr.src_addr != 0xffffffff)
+			return -1;
+		if (mask->hdr.dst_addr != 0 &&
+		    mask->hdr.dst_addr != 0xffffffff)
+			return -1;
+	}
+	return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_udp(const struct rte_flow_item *item,
+		       const uint8_t *mask, unsigned int size)
+{
+	if (item->mask) {
+		const struct rte_flow_item_udp *mask = item->mask;
+
+		if (mask->hdr.src_port != 0 &&
+		    mask->hdr.src_port != 0xffff)
+			return -1;
+		if (mask->hdr.dst_port != 0 &&
+		    mask->hdr.dst_port != 0xffff)
+			return -1;
+	}
+	return mlx4_flow_item_validate(item, mask, size);
+}
+
+static int
+mlx4_flow_validate_tcp(const struct rte_flow_item *item,
+		       const uint8_t *mask, unsigned int size)
+{
+	if (item->mask) {
+		const struct rte_flow_item_tcp *mask = item->mask;
+
+		if (mask->hdr.src_port != 0 &&
+		    mask->hdr.src_port != 0xffff)
+			return -1;
+		if (mask->hdr.dst_port != 0 &&
+		    mask->hdr.dst_port != 0xffff)
+			return -1;
+	}
+	return mlx4_flow_item_validate(item, mask, size);
+}
+
+/** Graph of supported items and associated actions. */
+static const struct mlx4_flow_items mlx4_flow_items[] = {
+	[RTE_FLOW_ITEM_TYPE_END] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
+	},
+	[RTE_FLOW_ITEM_TYPE_ETH] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
+			       RTE_FLOW_ITEM_TYPE_IPV4),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_eth){
+			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		},
+		.default_mask = &rte_flow_item_eth_mask,
+		.mask_sz = sizeof(struct rte_flow_item_eth),
+		.validate = mlx4_flow_validate_eth,
+		.convert = mlx4_flow_create_eth,
+		.dst_sz = sizeof(struct ibv_flow_spec_eth),
+	},
+	[RTE_FLOW_ITEM_TYPE_VLAN] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_vlan){
+		/* rte_flow_item_vlan_mask is invalid for mlx4. */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+			.tci = 0x0fff,
+#else
+			.tci = 0xff0f,
+#endif
+		},
+		.mask_sz = sizeof(struct rte_flow_item_vlan),
+		.validate = mlx4_flow_validate_vlan,
+		.convert = mlx4_flow_create_vlan,
+		.dst_sz = 0,
+	},
+	[RTE_FLOW_ITEM_TYPE_IPV4] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
+			       RTE_FLOW_ITEM_TYPE_TCP),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_ipv4){
+			.hdr = {
+				.src_addr = -1,
+				.dst_addr = -1,
+			},
+		},
+		.default_mask = &rte_flow_item_ipv4_mask,
+		.mask_sz = sizeof(struct rte_flow_item_ipv4),
+		.validate = mlx4_flow_validate_ipv4,
+		.convert = mlx4_flow_create_ipv4,
+		.dst_sz = sizeof(struct ibv_flow_spec_ipv4),
+	},
+	[RTE_FLOW_ITEM_TYPE_UDP] = {
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_udp){
+			.hdr = {
+				.src_port = -1,
+				.dst_port = -1,
+			},
+		},
+		.default_mask = &rte_flow_item_udp_mask,
+		.mask_sz = sizeof(struct rte_flow_item_udp),
+		.validate = mlx4_flow_validate_udp,
+		.convert = mlx4_flow_create_udp,
+		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
+	},
+	[RTE_FLOW_ITEM_TYPE_TCP] = {
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_tcp){
+			.hdr = {
+				.src_port = -1,
+				.dst_port = -1,
+			},
+		},
+		.default_mask = &rte_flow_item_tcp_mask,
+		.mask_sz = sizeof(struct rte_flow_item_tcp),
+		.validate = mlx4_flow_validate_tcp,
+		.convert = mlx4_flow_create_tcp,
+		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
+	},
+};
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] items
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ * @param[in, out] flow
+ *   Flow structure to update.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_validate(struct priv *priv,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error,
+		   struct mlx4_flow *flow)
+{
+	const struct mlx4_flow_items *cur_item = mlx4_flow_items;
+	struct mlx4_flow_action action = {
+		.queue = 0,
+		.drop = 0,
+	};
+
+	(void)priv;
+	if (attr->group) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+				   NULL,
+				   "groups are not supported");
+		return -rte_errno;
+	}
+	if (attr->priority) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+				   NULL,
+				   "priorities are not supported");
+		return -rte_errno;
+	}
+	if (attr->egress) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+				   NULL,
+				   "egress is not supported");
+		return -rte_errno;
+	}
+	if (!attr->ingress) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+				   NULL,
+				   "only ingress is supported");
+		return -rte_errno;
+	}
+	/* Go over items list. */
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+		const struct mlx4_flow_items *token = NULL;
+		unsigned int i;
+		int err;
+
+		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+			continue;
+		/*
+		 * The nic can support patterns with NULL eth spec only
+		 * if eth is a single item in a rule.
+		 */
+		if (!items->spec &&
+			items->type == RTE_FLOW_ITEM_TYPE_ETH) {
+			const struct rte_flow_item *next = items + 1;
+
+			if (next->type != RTE_FLOW_ITEM_TYPE_END) {
+				rte_flow_error_set(error, ENOTSUP,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   items,
+						   "the rule requires"
+						   " an Ethernet spec");
+				return -rte_errno;
+			}
+		}
+		for (i = 0;
+		     cur_item->items &&
+		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
+		     ++i) {
+			if (cur_item->items[i] == items->type) {
+				token = &mlx4_flow_items[items->type];
+				break;
+			}
+		}
+		if (!token)
+			goto exit_item_not_supported;
+		cur_item = token;
+		err = cur_item->validate(items,
+					(const uint8_t *)cur_item->mask,
+					 cur_item->mask_sz);
+		if (err)
+			goto exit_item_not_supported;
+		if (flow->ibv_attr && cur_item->convert) {
+			err = cur_item->convert(items,
+						(cur_item->default_mask ?
+						 cur_item->default_mask :
+						 cur_item->mask),
+						 flow);
+			if (err)
+				goto exit_item_not_supported;
+		}
+		flow->offset += cur_item->dst_sz;
+	}
+	/* Go over actions list */
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+			continue;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+			action.drop = 1;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+			const struct rte_flow_action_queue *queue =
+				(const struct rte_flow_action_queue *)
+				actions->conf;
+
+			if (!queue || (queue->index > (priv->rxqs_n - 1)))
+				goto exit_action_not_supported;
+			action.queue = 1;
+		} else {
+			goto exit_action_not_supported;
+		}
+	}
+	if (!action.queue && !action.drop) {
+		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "no valid action");
+		return -rte_errno;
+	}
+	return 0;
+exit_item_not_supported:
+	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+			   items, "item not supported");
+	return -rte_errno;
+exit_action_not_supported:
+	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+			   actions, "action not supported");
+	return -rte_errno;
+}
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_validate(struct rte_eth_dev *dev,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	int ret;
+	struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr) };
+
+	priv_lock(priv);
+	ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
+	priv_unlock(priv);
+	return ret;
+}
+
+/**
+ * Complete flow rule creation.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param ibv_attr
+ *   Verbs flow attributes.
+ * @param action
+ *   Target action structure.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A flow if the rule could be created.
+ */
+static struct rte_flow *
+priv_flow_create_action_queue(struct priv *priv,
+			      struct ibv_flow_attr *ibv_attr,
+			      struct mlx4_flow_action *action,
+			      struct rte_flow_error *error)
+{
+	struct rxq *rxq;
+	struct ibv_qp *qp;
+	struct rte_flow *rte_flow;
+
+	assert(priv->pd);
+	assert(priv->ctx);
+	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
+	if (!rte_flow) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "cannot allocate flow memory");
+		return NULL;
+	}
+	rxq = (*priv->rxqs)[action->queue_id];
+	if (action->drop) {
+		rte_flow->cq =
+			ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
+					  &(struct ibv_exp_cq_init_attr){
+						  .comp_mask = 0,
+					  });
+		if (!rte_flow->cq) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "cannot allocate CQ");
+			goto error;
+		}
+		rte_flow->qp = ibv_exp_create_qp(
+			priv->ctx,
+			&(struct ibv_exp_qp_init_attr){
+				.send_cq = rte_flow->cq,
+				.recv_cq = rte_flow->cq,
+				.cap = {
+					.max_recv_wr = 1,
+					.max_recv_sge = 1,
+				},
+				.qp_type = IBV_QPT_RAW_PACKET,
+				.comp_mask =
+					IBV_EXP_QP_INIT_ATTR_PD |
+					IBV_EXP_QP_INIT_ATTR_PORT |
+					IBV_EXP_QP_INIT_ATTR_RES_DOMAIN,
+				.pd = priv->pd,
+				.res_domain = rxq->rd,
+				.port_num = priv->port,
+			});
+		if (!rte_flow->qp) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "cannot allocate QP");
+			goto error;
+		}
+		qp = rte_flow->qp;
+	} else {
+		rte_flow->rxq = rxq;
+		qp = rxq->qp;
+	}
+	rte_flow->ibv_attr = ibv_attr;
+	rte_flow->ibv_flow = ibv_create_flow(qp, rte_flow->ibv_attr);
+	if (!rte_flow->ibv_flow) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "flow rule creation failure");
+		goto error;
+	}
+	return rte_flow;
+
+error:
+	assert(rte_flow);
+	if (rte_flow->cq)
+		ibv_destroy_cq(rte_flow->cq);
+	if (rte_flow->qp)
+		ibv_destroy_qp(rte_flow->qp);
+	rte_free(rte_flow->ibv_attr);
+	rte_free(rte_flow);
+	return NULL;
+}
+
+/**
+ * Convert a flow.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] items
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A flow on success, NULL otherwise.
+ */
+static struct rte_flow *
+priv_flow_create(struct priv *priv,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error)
+{
+	struct rte_flow *rte_flow;
+	struct mlx4_flow_action action;
+	struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr), };
+	int err;
+
+	err = priv_flow_validate(priv, attr, items, actions, error, &flow);
+	if (err)
+		return NULL;
+	flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
+	if (!flow.ibv_attr) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "cannot allocate ibv_attr memory");
+		return NULL;
+	}
+	flow.offset = sizeof(struct ibv_flow_attr);
+	*flow.ibv_attr = (struct ibv_flow_attr){
+		.comp_mask = 0,
+		.type = IBV_FLOW_ATTR_NORMAL,
+		.size = sizeof(struct ibv_flow_attr),
+		.priority = attr->priority,
+		.num_of_specs = 0,
+		.port = priv->port,
+		.flags = 0,
+	};
+	claim_zero(priv_flow_validate(priv, attr, items, actions,
+				      error, &flow));
+	action = (struct mlx4_flow_action){
+		.queue = 0,
+		.drop = 0,
+	};
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+			continue;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+			action.queue = 1;
+			action.queue_id =
+				((const struct rte_flow_action_queue *)
+				 actions->conf)->index;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+			action.drop = 1;
+		} else {
+			rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions, "unsupported action");
+			goto exit;
+		}
+	}
+	rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
+						 &action, error);
+	return rte_flow;
+exit:
+	rte_free(flow.ibv_attr);
+	return NULL;
+}
+
+/**
+ * Create a flow.
+ *
+ * @see rte_flow_create()
+ * @see rte_flow_ops
+ */
+struct rte_flow *
+mlx4_flow_create(struct rte_eth_dev *dev,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct rte_flow *flow;
+
+	priv_lock(priv);
+	flow = priv_flow_create(priv, attr, items, actions, error);
+	if (flow) {
+		LIST_INSERT_HEAD(&priv->flows, flow, next);
+		DEBUG("Flow created %p", (void *)flow);
+	}
+	priv_unlock(priv);
+	return flow;
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] flow
+ *   Flow to destroy.
+ */
+static void
+priv_flow_destroy(struct priv *priv, struct rte_flow *flow)
+{
+	(void)priv;
+	LIST_REMOVE(flow, next);
+	if (flow->ibv_flow)
+		claim_zero(ibv_destroy_flow(flow->ibv_flow));
+	if (flow->qp)
+		claim_zero(ibv_destroy_qp(flow->qp));
+	if (flow->cq)
+		claim_zero(ibv_destroy_cq(flow->cq));
+	rte_free(flow->ibv_attr);
+	DEBUG("Flow destroyed %p", (void *)flow);
+	rte_free(flow);
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_destroy(struct rte_eth_dev *dev,
+		  struct rte_flow *flow,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	(void)error;
+	priv_lock(priv);
+	priv_flow_destroy(priv, flow);
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+priv_flow_flush(struct priv *priv)
+{
+	while (!LIST_EMPTY(&priv->flows)) {
+		struct rte_flow *flow;
+
+		flow = LIST_FIRST(&priv->flows);
+		priv_flow_destroy(priv, flow);
+	}
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx4_flow_flush(struct rte_eth_dev *dev,
+		struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	(void)error;
+	priv_lock(priv);
+	priv_flow_flush(priv);
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
+ * Remove all flows.
+ *
+ * Called by dev_stop() to remove all flows.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+void
+mlx4_priv_flow_stop(struct priv *priv)
+{
+	struct rte_flow *flow;
+
+	for (flow = LIST_FIRST(&priv->flows);
+	     flow;
+	     flow = LIST_NEXT(flow, next)) {
+		claim_zero(ibv_destroy_flow(flow->ibv_flow));
+		flow->ibv_flow = NULL;
+		DEBUG("Flow %p removed", (void *)flow);
+	}
+}
+
+/**
+ * Add all flows.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success, a errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_priv_flow_start(struct priv *priv)
+{
+	struct ibv_qp *qp;
+	struct rte_flow *flow;
+
+	for (flow = LIST_FIRST(&priv->flows);
+	     flow;
+	     flow = LIST_NEXT(flow, next)) {
+		qp = flow->qp ? flow->qp : flow->rxq->qp;
+		flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
+		if (!flow->ibv_flow) {
+			DEBUG("Flow %p cannot be applied", (void *)flow);
+			rte_errno = EINVAL;
+			return rte_errno;
+		}
+		DEBUG("Flow %p applied", (void *)flow);
+	}
+	return 0;
+}
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
new file mode 100644
index 0000000..66c5be6
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -0,0 +1,104 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_PMD_MLX4_FLOW_H_
+#define RTE_PMD_MLX4_FLOW_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/queue.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_byteorder.h>
+
+#include "mlx4.h"
+
+struct rte_flow {
+	LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
+	struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
+	struct ibv_flow *ibv_flow; /**< Verbs flow. */
+	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+	struct ibv_qp *qp; /**< Verbs queue pair. */
+	struct ibv_cq *cq; /**< Verbs completion queue. */
+};
+
+int
+mlx4_flow_validate(struct rte_eth_dev *dev,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error);
+
+struct rte_flow *
+mlx4_flow_create(struct rte_eth_dev *dev,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error);
+
+int
+mlx4_flow_destroy(struct rte_eth_dev *dev,
+		  struct rte_flow *flow,
+		  struct rte_flow_error *error);
+
+int
+mlx4_flow_flush(struct rte_eth_dev *dev,
+		struct rte_flow_error *error);
+
+/** Structure to pass to the conversion function. */
+struct mlx4_flow {
+	struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
+	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
+};
+
+struct mlx4_flow_action {
+	uint32_t drop:1; /**< Target is a drop queue. */
+	uint32_t queue:1; /**< Target is a receive queue. */
+	uint32_t queue_id; /**< Identifier of the queue. */
+};
+
+int mlx4_priv_flow_start(struct priv *priv);
+void mlx4_priv_flow_stop(struct priv *priv);
+
+#endif /* RTE_PMD_MLX4_FLOW_H_ */
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/2] net/mlx4: split the definitions to the header file
  2017-02-23 10:44     ` Vasily Philipov
@ 2017-03-06  9:24       ` Ferruh Yigit
  0 siblings, 0 replies; 15+ messages in thread
From: Ferruh Yigit @ 2017-03-06  9:24 UTC (permalink / raw)
  To: Vasily Philipov, dev
  Cc: Adrien Mazarguil, Nélio Laranjeiro, Thomas Monjalon, Neil Horman

On 2/23/2017 10:44 AM, Vasily Philipov wrote:
> Hi Ferruh,
> 
>> -----Original Message-----
>> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
>> Sent: Wednesday, February 22, 2017 21:05
>> To: Vasily Philipov <vasilyf@mellanox.com>; dev@dpdk.org
>> Cc: Adrien Mazarguil <adrien.mazarguil@6wind.com>; Nélio Laranjeiro
>> <nelio.laranjeiro@6wind.com>
>> Subject: Re: [dpdk-dev] [PATCH v2 1/2] net/mlx4: split the definitions to the
>> header file
>>
>> On 2/22/2017 1:42 PM, Vasily Philipov wrote:
>>> Make some structs/defines visible from different source files by
>>> placing them into mlx4.h header.
>>>
>>> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
>>> ---
>>>  drivers/net/mlx4/mlx4.c | 183
>>> ++--------------------------------------------
>>>  drivers/net/mlx4/mlx4.h | 187
>>> +++++++++++++++++++++++++++++++++++++++++++++++-
>>>  2 files changed, 189 insertions(+), 181 deletions(-)
>>>
>>> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index
>>> 79efaaa..82ccac8 100644
>>> --- a/drivers/net/mlx4/mlx4.c
>>> +++ b/drivers/net/mlx4/mlx4.c
>>> @@ -1,8 +1,8 @@
>>>  /*-
>>>   *   BSD LICENSE
>>>   *
>>> - *   Copyright 2012-2015 6WIND S.A.
>>> - *   Copyright 2012 Mellanox.
>>> + *   Copyright 2012-2017 6WIND S.A.
>>> + *   Copyright 2012-2017 Mellanox.
>>
>> Can someone knowledgeable about Copyright help please?
>>
>> What is the year field in Copyright line for?
>> And above change updates Copyright from 2012 to 2012-2017, is this correct?
>>
> 
> The year line was changes in order to show when the file was changed the last time...

I see, but I don't know if the year field is for last updated date
marker, specially when there are multiple copyright holders.

Overall I don't know why second date is required at all, assuming first
date shows the start date of the work and sets the copyright coverage date.

A comment from who knows more about these issues is welcome.

Thanks,
ferruh

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/2] net/mlx4: split the definitions to the header file
  2017-03-05  7:51 ` [dpdk-dev] [PATCH v3 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
@ 2017-03-20  9:19   ` Nélio Laranjeiro
  2017-03-20 14:18     ` Ferruh Yigit
  0 siblings, 1 reply; 15+ messages in thread
From: Nélio Laranjeiro @ 2017-03-20  9:19 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Adrien Mazarguil

On Sun, Mar 05, 2017 at 09:51:31AM +0200, Vasily Philipov wrote:
> Make priv_lock/priv_unlock functions and some other structs/defines visible
> from different source files by placing them into mlx4.h header.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> ---
>  drivers/net/mlx4/mlx4.c | 193 +-----------------------------------------------
>  drivers/net/mlx4/mlx4.h | 187 +++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 189 insertions(+), 191 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index 79efaaa..8f6c57f 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -1,8 +1,8 @@
>  /*-
>   *   BSD LICENSE
>   *
> - *   Copyright 2012-2015 6WIND S.A.
> - *   Copyright 2012 Mellanox.
> + *   Copyright 2012-2017 6WIND S.A.
> + *   Copyright 2012-2017 Mellanox.
>   *
>   *   Redistribution and use in source and binary forms, with or without
>   *   modification, are permitted provided that the following conditions
> @@ -58,20 +58,6 @@
>  #include <linux/sockios.h>
>  #include <fcntl.h>
>  
> -/* Verbs header. */
> -/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic ignored "-Wpedantic"
> -#endif
> -#include <infiniband/verbs.h>
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic error "-Wpedantic"
> -#endif
> -
> -/* DPDK headers don't like -pedantic. */
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic ignored "-Wpedantic"
> -#endif
>  #include <rte_ether.h>
>  #include <rte_ethdev.h>
>  #include <rte_dev.h>
> @@ -86,9 +72,6 @@
>  #include <rte_log.h>
>  #include <rte_alarm.h>
>  #include <rte_memory.h>
> -#ifdef PEDANTIC
> -#pragma GCC diagnostic error "-Wpedantic"
> -#endif
>  
>  /* Generated configuration header. */
>  #include "mlx4_autoconf.h"
> @@ -96,21 +79,6 @@
>  /* PMD header. */
>  #include "mlx4.h"
>  
> -/* Runtime logging through RTE_LOG() is enabled when not in debugging mode.
> - * Intermediate LOG_*() macros add the required end-of-line characters. */
> -#ifndef NDEBUG
> -#define INFO(...) DEBUG(__VA_ARGS__)
> -#define WARN(...) DEBUG(__VA_ARGS__)
> -#define ERROR(...) DEBUG(__VA_ARGS__)
> -#else
> -#define LOG__(level, m, ...) \
> -	RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
> -#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
> -#define INFO(...) LOG_(INFO, __VA_ARGS__)
> -#define WARN(...) LOG_(WARNING, __VA_ARGS__)
> -#define ERROR(...) LOG_(ERR, __VA_ARGS__)
> -#endif
> -
>  /* Convenience macros for accessing mbuf fields. */
>  #define NEXT(m) ((m)->next)
>  #define DATA_LEN(m) ((m)->data_len)
> @@ -137,157 +105,6 @@
>  	 (((val) & (from)) / ((from) / (to))) : \
>  	 (((val) & (from)) * ((to) / (from))))
>  
> -struct mlx4_rxq_stats {
> -	unsigned int idx; /**< Mapping index. */
> -#ifdef MLX4_PMD_SOFT_COUNTERS
> -	uint64_t ipackets;  /**< Total of successfully received packets. */
> -	uint64_t ibytes;    /**< Total of successfully received bytes. */
> -#endif
> -	uint64_t idropped;  /**< Total of packets dropped when RX ring full. */
> -	uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
> -};
> -
> -struct mlx4_txq_stats {
> -	unsigned int idx; /**< Mapping index. */
> -#ifdef MLX4_PMD_SOFT_COUNTERS
> -	uint64_t opackets; /**< Total of successfully sent packets. */
> -	uint64_t obytes;   /**< Total of successfully sent bytes. */
> -#endif
> -	uint64_t odropped; /**< Total of packets not sent when TX ring full. */
> -};
> -
> -/* RX element (scattered packets). */
> -struct rxq_elt_sp {
> -	struct ibv_recv_wr wr; /* Work Request. */
> -	struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
> -	struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
> -};
> -
> -/* RX element. */
> -struct rxq_elt {
> -	struct ibv_recv_wr wr; /* Work Request. */
> -	struct ibv_sge sge; /* Scatter/Gather Element. */
> -	/* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
> -};
> -
> -/* RX queue descriptor. */
> -struct rxq {
> -	struct priv *priv; /* Back pointer to private data. */
> -	struct rte_mempool *mp; /* Memory Pool for allocations. */
> -	struct ibv_mr *mr; /* Memory Region (for mp). */
> -	struct ibv_cq *cq; /* Completion Queue. */
> -	struct ibv_qp *qp; /* Queue Pair. */
> -	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> -	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> -	/*
> -	 * Each VLAN ID requires a separate flow steering rule.
> -	 */
> -	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> -	struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
> -	struct ibv_flow *promisc_flow; /* Promiscuous flow. */
> -	struct ibv_flow *allmulti_flow; /* Multicast flow. */
> -	unsigned int port_id; /* Port ID for incoming packets. */
> -	unsigned int elts_n; /* (*elts)[] length. */
> -	unsigned int elts_head; /* Current index in (*elts)[]. */
> -	union {
> -		struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
> -		struct rxq_elt (*no_sp)[]; /* RX elements. */
> -	} elts;
> -	unsigned int sp:1; /* Use scattered RX elements. */
> -	unsigned int csum:1; /* Enable checksum offloading. */
> -	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
> -	struct mlx4_rxq_stats stats; /* RX queue counters. */
> -	unsigned int socket; /* CPU socket ID for allocations. */
> -	struct ibv_exp_res_domain *rd; /* Resource Domain. */
> -};
> -
> -/* TX element. */
> -struct txq_elt {
> -	struct rte_mbuf *buf;
> -};
> -
> -/* Linear buffer type. It is used when transmitting buffers with too many
> - * segments that do not fit the hardware queue (see max_send_sge).
> - * Extra segments are copied (linearized) in such buffers, replacing the
> - * last SGE during TX.
> - * The size is arbitrary but large enough to hold a jumbo frame with
> - * 8 segments considering mbuf.buf_len is about 2048 bytes. */
> -typedef uint8_t linear_t[16384];
> -
> -/* TX queue descriptor. */
> -struct txq {
> -	struct priv *priv; /* Back pointer to private data. */
> -	struct {
> -		const struct rte_mempool *mp; /* Cached Memory Pool. */
> -		struct ibv_mr *mr; /* Memory Region (for mp). */
> -		uint32_t lkey; /* mr->lkey */
> -	} mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
> -	struct ibv_cq *cq; /* Completion Queue. */
> -	struct ibv_qp *qp; /* Queue Pair. */
> -	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> -	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> -#if MLX4_PMD_MAX_INLINE > 0
> -	uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
> -#endif
> -	unsigned int elts_n; /* (*elts)[] length. */
> -	struct txq_elt (*elts)[]; /* TX elements. */
> -	unsigned int elts_head; /* Current index in (*elts)[]. */
> -	unsigned int elts_tail; /* First element awaiting completion. */
> -	unsigned int elts_comp; /* Number of completion requests. */
> -	unsigned int elts_comp_cd; /* Countdown for next completion request. */
> -	unsigned int elts_comp_cd_init; /* Initial value for countdown. */
> -	struct mlx4_txq_stats stats; /* TX queue counters. */
> -	linear_t (*elts_linear)[]; /* Linearized buffers. */
> -	struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
> -	unsigned int socket; /* CPU socket ID for allocations. */
> -	struct ibv_exp_res_domain *rd; /* Resource Domain. */
> -};
> -
> -struct priv {
> -	struct rte_eth_dev *dev; /* Ethernet device. */
> -	struct ibv_context *ctx; /* Verbs context. */
> -	struct ibv_device_attr device_attr; /* Device properties. */
> -	struct ibv_pd *pd; /* Protection Domain. */
> -	/*
> -	 * MAC addresses array and configuration bit-field.
> -	 * An extra entry that cannot be modified by the DPDK is reserved
> -	 * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
> -	 */
> -	struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
> -	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> -	/* VLAN filters. */
> -	struct {
> -		unsigned int enabled:1; /* If enabled. */
> -		unsigned int id:12; /* VLAN ID (0-4095). */
> -	} vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
> -	/* Device properties. */
> -	uint16_t mtu; /* Configured MTU. */
> -	uint8_t port; /* Physical port number. */
> -	unsigned int started:1; /* Device started, flows enabled. */
> -	unsigned int promisc:1; /* Device in promiscuous mode. */
> -	unsigned int allmulti:1; /* Device receives all multicast packets. */
> -	unsigned int hw_qpg:1; /* QP groups are supported. */
> -	unsigned int hw_tss:1; /* TSS is supported. */
> -	unsigned int hw_rss:1; /* RSS is supported. */
> -	unsigned int hw_csum:1; /* Checksum offload is supported. */
> -	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
> -	unsigned int rss:1; /* RSS is enabled. */
> -	unsigned int vf:1; /* This is a VF device. */
> -	unsigned int pending_alarm:1; /* An alarm is pending. */
> -#ifdef INLINE_RECV
> -	unsigned int inl_recv_size; /* Inline recv size */
> -#endif
> -	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
> -	/* RX/TX queues. */
> -	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
> -	unsigned int rxqs_n; /* RX queues array size. */
> -	unsigned int txqs_n; /* TX queues array size. */
> -	struct rxq *(*rxqs)[]; /* RX queues. */
> -	struct txq *(*txqs)[]; /* TX queues. */
> -	struct rte_intr_handle intr_handle; /* Interrupt handler. */
> -	rte_spinlock_t lock; /* Lock for control functions. */
> -};
> -
>  /* Local storage for secondary process data. */
>  struct mlx4_secondary_data {
>  	struct rte_eth_dev_data data; /* Local device data. */
> @@ -335,8 +152,7 @@ struct mlx4_secondary_data {
>   * @param priv
>   *   Pointer to private structure.
>   */
> -static void
> -priv_lock(struct priv *priv)
> +void priv_lock(struct priv *priv)
>  {
>  	rte_spinlock_lock(&priv->lock);
>  }
> @@ -347,8 +163,7 @@ struct mlx4_secondary_data {
>   * @param priv
>   *   Pointer to private structure.
>   */
> -static void
> -priv_unlock(struct priv *priv)
> +void priv_unlock(struct priv *priv)
>  {
>  	rte_spinlock_unlock(&priv->lock);
>  }
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index 4c7505e..70c9ecd 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -1,8 +1,8 @@
>  /*-
>   *   BSD LICENSE
>   *
> - *   Copyright 2012-2015 6WIND S.A.
> - *   Copyright 2012 Mellanox.
> + *   Copyright 2012-2017 6WIND S.A.
> + *   Copyright 2012-2017 Mellanox.
>   *
>   *   Redistribution and use in source and binary forms, with or without
>   *   modification, are permitted provided that the following conditions
> @@ -39,6 +39,33 @@
>  #include <limits.h>
>  
>  /*
> + * Runtime logging through RTE_LOG() is enabled when not in debugging mode.
> + * Intermediate LOG_*() macros add the required end-of-line characters.
> + */
> +#ifndef NDEBUG
> +#define INFO(...) DEBUG(__VA_ARGS__)
> +#define WARN(...) DEBUG(__VA_ARGS__)
> +#define ERROR(...) DEBUG(__VA_ARGS__)
> +#else
> +#define LOG__(level, m, ...) \
> +	RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
> +#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
> +#define INFO(...) LOG_(INFO, __VA_ARGS__)
> +#define WARN(...) LOG_(WARNING, __VA_ARGS__)
> +#define ERROR(...) LOG_(ERR, __VA_ARGS__)
> +#endif
> +
> +/* Verbs header. */
> +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic ignored "-Wpedantic"
> +#endif
> +#include <infiniband/verbs.h>
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic error "-Wpedantic"
> +#endif
> +
> +/*
>   * Maximum number of simultaneous MAC addresses supported.
>   *
>   * According to ConnectX's Programmer Reference Manual:
> @@ -160,4 +187,160 @@ enum {
>  #define claim_positive(...) (__VA_ARGS__)
>  #endif /* NDEBUG */
>  
> +struct mlx4_rxq_stats {
> +	unsigned int idx; /**< Mapping index. */
> +#ifdef MLX4_PMD_SOFT_COUNTERS
> +	uint64_t ipackets; /**< Total of successfully received packets. */
> +	uint64_t ibytes; /**< Total of successfully received bytes. */
> +#endif
> +	uint64_t idropped; /**< Total of packets dropped when RX ring full. */
> +	uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
> +};
> +
> +/* RX element (scattered packets). */
> +struct rxq_elt_sp {
> +	struct ibv_recv_wr wr; /* Work Request. */
> +	struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
> +	struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
> +};
> +
> +/* RX element. */
> +struct rxq_elt {
> +	struct ibv_recv_wr wr; /* Work Request. */
> +	struct ibv_sge sge; /* Scatter/Gather Element. */
> +	/* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
> +};
> +
> +/* RX queue descriptor. */
> +struct rxq {
> +	struct priv *priv; /* Back pointer to private data. */
> +	struct rte_mempool *mp; /* Memory Pool for allocations. */
> +	struct ibv_mr *mr; /* Memory Region (for mp). */
> +	struct ibv_cq *cq; /* Completion Queue. */
> +	struct ibv_qp *qp; /* Queue Pair. */
> +	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> +	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> +	/*
> +	 * Each VLAN ID requires a separate flow steering rule.
> +	 */
> +	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> +	struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
> +	struct ibv_flow *promisc_flow; /* Promiscuous flow. */
> +	struct ibv_flow *allmulti_flow; /* Multicast flow. */
> +	unsigned int port_id; /* Port ID for incoming packets. */
> +	unsigned int elts_n; /* (*elts)[] length. */
> +	unsigned int elts_head; /* Current index in (*elts)[]. */
> +	union {
> +		struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
> +		struct rxq_elt (*no_sp)[]; /* RX elements. */
> +	} elts;
> +	unsigned int sp:1; /* Use scattered RX elements. */
> +	unsigned int csum:1; /* Enable checksum offloading. */
> +	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
> +	struct mlx4_rxq_stats stats; /* RX queue counters. */
> +	unsigned int socket; /* CPU socket ID for allocations. */
> +	struct ibv_exp_res_domain *rd; /* Resource Domain. */
> +};
> +
> +/* TX element. */
> +struct txq_elt {
> +	struct rte_mbuf *buf;
> +};
> +
> +struct mlx4_txq_stats {
> +	unsigned int idx; /**< Mapping index. */
> +#ifdef MLX4_PMD_SOFT_COUNTERS
> +	uint64_t opackets; /**< Total of successfully sent packets. */
> +	uint64_t obytes;   /**< Total of successfully sent bytes. */
> +#endif
> +	uint64_t odropped; /**< Total of packets not sent when TX ring full. */
> +};
> +
> +/*
> + * Linear buffer type. It is used when transmitting buffers with too many
> + * segments that do not fit the hardware queue (see max_send_sge).
> + * Extra segments are copied (linearized) in such buffers, replacing the
> + * last SGE during TX.
> + * The size is arbitrary but large enough to hold a jumbo frame with
> + * 8 segments considering mbuf.buf_len is about 2048 bytes.
> + */
> +typedef uint8_t linear_t[16384];
> +
> +/* TX queue descriptor. */
> +struct txq {
> +	struct priv *priv; /* Back pointer to private data. */
> +	struct {
> +		const struct rte_mempool *mp; /* Cached Memory Pool. */
> +		struct ibv_mr *mr; /* Memory Region (for mp). */
> +		uint32_t lkey; /* mr->lkey */
> +	} mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
> +	struct ibv_cq *cq; /* Completion Queue. */
> +	struct ibv_qp *qp; /* Queue Pair. */
> +	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
> +	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
> +#if MLX4_PMD_MAX_INLINE > 0
> +	uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
> +#endif
> +	unsigned int elts_n; /* (*elts)[] length. */
> +	struct txq_elt (*elts)[]; /* TX elements. */
> +	unsigned int elts_head; /* Current index in (*elts)[]. */
> +	unsigned int elts_tail; /* First element awaiting completion. */
> +	unsigned int elts_comp; /* Number of completion requests. */
> +	unsigned int elts_comp_cd; /* Countdown for next completion request. */
> +	unsigned int elts_comp_cd_init; /* Initial value for countdown. */
> +	struct mlx4_txq_stats stats; /* TX queue counters. */
> +	linear_t (*elts_linear)[]; /* Linearized buffers. */
> +	struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
> +	unsigned int socket; /* CPU socket ID for allocations. */
> +	struct ibv_exp_res_domain *rd; /* Resource Domain. */
> +};
> +
> +struct priv {
> +	struct rte_eth_dev *dev; /* Ethernet device. */
> +	struct ibv_context *ctx; /* Verbs context. */
> +	struct ibv_device_attr device_attr; /* Device properties. */
> +	struct ibv_pd *pd; /* Protection Domain. */
> +	/*
> +	 * MAC addresses array and configuration bit-field.
> +	 * An extra entry that cannot be modified by the DPDK is reserved
> +	 * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
> +	 */
> +	struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
> +	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
> +	/* VLAN filters. */
> +	struct {
> +		unsigned int enabled:1; /* If enabled. */
> +		unsigned int id:12; /* VLAN ID (0-4095). */
> +	} vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
> +	/* Device properties. */
> +	uint16_t mtu; /* Configured MTU. */
> +	uint8_t port; /* Physical port number. */
> +	unsigned int started:1; /* Device started, flows enabled. */
> +	unsigned int promisc:1; /* Device in promiscuous mode. */
> +	unsigned int allmulti:1; /* Device receives all multicast packets. */
> +	unsigned int hw_qpg:1; /* QP groups are supported. */
> +	unsigned int hw_tss:1; /* TSS is supported. */
> +	unsigned int hw_rss:1; /* RSS is supported. */
> +	unsigned int hw_csum:1; /* Checksum offload is supported. */
> +	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
> +	unsigned int rss:1; /* RSS is enabled. */
> +	unsigned int vf:1; /* This is a VF device. */
> +	unsigned int pending_alarm:1; /* An alarm is pending. */
> +#ifdef INLINE_RECV
> +	unsigned int inl_recv_size; /* Inline recv size */
> +#endif
> +	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
> +	/* RX/TX queues. */
> +	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
> +	unsigned int rxqs_n; /* RX queues array size. */
> +	unsigned int txqs_n; /* TX queues array size. */
> +	struct rxq *(*rxqs)[]; /* RX queues. */
> +	struct txq *(*txqs)[]; /* TX queues. */
> +	struct rte_intr_handle intr_handle; /* Interrupt handler. */
> +	rte_spinlock_t lock; /* Lock for control functions. */
> +};
> +
> +void priv_lock(struct priv *priv);
> +void priv_unlock(struct priv *priv);
> +
>  #endif /* RTE_PMD_MLX4_H_ */
> -- 
> 1.8.3.1
> 

Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>

-- 
Nélio Laranjeiro
6WIND

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [dpdk-dev] [PATCH v3 2/2] net/mlx4: support basic flow items and actions
  2017-03-05  7:51 ` [dpdk-dev] [PATCH v3 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
@ 2017-03-20  9:19   ` Nélio Laranjeiro
  0 siblings, 0 replies; 15+ messages in thread
From: Nélio Laranjeiro @ 2017-03-20  9:19 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Adrien Mazarguil

On Sun, Mar 05, 2017 at 09:51:32AM +0200, Vasily Philipov wrote:
> Adding support for the next items: eth, vlan, ipv4, udp, tcp and for the
> next actions: queue, drop
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> ---
>  drivers/net/mlx4/Makefile    |    3 +-
>  drivers/net/mlx4/mlx4.c      |   91 +++-
>  drivers/net/mlx4/mlx4.h      |    3 +
>  drivers/net/mlx4/mlx4_flow.c | 1043 ++++++++++++++++++++++++++++++++++++++++++
>  drivers/net/mlx4/mlx4_flow.h |  104 +++++
>  5 files changed, 1228 insertions(+), 16 deletions(-)
>  create mode 100644 drivers/net/mlx4/mlx4_flow.c
>  create mode 100644 drivers/net/mlx4/mlx4_flow.h
> 
> diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
> index 68c5902..1d463f7 100644
> --- a/drivers/net/mlx4/Makefile
> +++ b/drivers/net/mlx4/Makefile
> @@ -36,6 +36,7 @@ LIB = librte_pmd_mlx4.a
>  
>  # Sources.
>  SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c
> +SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_flow.c
>  
>  # Dependencies.
>  DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += lib/librte_ether
> @@ -129,7 +130,7 @@ mlx4_autoconf.h: mlx4_autoconf.h.new
>  		cmp '$<' '$@' $(AUTOCONF_OUTPUT) || \
>  		mv '$<' '$@'
>  
> -mlx4.o: mlx4_autoconf.h
> +$(SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD):.c=.o): mlx4_autoconf.h
>  
>  clean_mlx4: FORCE
>  	$Q rm -f -- mlx4_autoconf.h mlx4_autoconf.h.new
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index 8f6c57f..bb0c647 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -72,12 +72,14 @@
>  #include <rte_log.h>
>  #include <rte_alarm.h>
>  #include <rte_memory.h>
> +#include <rte_flow.h>
>  
>  /* Generated configuration header. */
>  #include "mlx4_autoconf.h"
>  
> -/* PMD header. */
> +/* PMD headers. */
>  #include "mlx4.h"
> +#include "mlx4_flow.h"
>  
>  /* Convenience macros for accessing mbuf fields. */
>  #define NEXT(m) ((m)->next)
> @@ -2341,6 +2343,7 @@ struct txq_mp2mr_mbuf_check_data {
>  	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
>  	*attr = (struct ibv_flow_attr){
>  		.type = IBV_FLOW_ATTR_NORMAL,
> +		.priority = 3,
>  		.num_of_specs = 1,
>  		.port = priv->port,
>  		.flags = 0
> @@ -3928,6 +3931,7 @@ struct txq_mp2mr_mbuf_check_data {
>  	unsigned int i = 0;
>  	unsigned int r;
>  	struct rxq *rxq;
> +	int ret;
>  
>  	if (mlx4_is_secondary())
>  		return -E_RTE_SECONDARY;
> @@ -3947,8 +3951,6 @@ struct txq_mp2mr_mbuf_check_data {
>  	}
>  	/* Iterate only once when RSS is enabled. */
>  	do {
> -		int ret;
> -
>  		/* Ignore nonexistent RX queues. */
>  		if (rxq == NULL)
>  			continue;
> @@ -3961,22 +3963,30 @@ struct txq_mp2mr_mbuf_check_data {
>  			continue;
>  		WARN("%p: QP flow attachment failed: %s",
>  		     (void *)dev, strerror(ret));
> -		/* Rollback. */
> -		while (i != 0) {
> -			rxq = (*priv->rxqs)[--i];
> -			if (rxq != NULL) {
> -				rxq_allmulticast_disable(rxq);
> -				rxq_promiscuous_disable(rxq);
> -				rxq_mac_addrs_del(rxq);
> -			}
> -		}
> -		priv->started = 0;
> -		priv_unlock(priv);
> -		return -ret;
> +		goto err;
>  	} while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
>  	priv_dev_interrupt_handler_install(priv, dev);
> +	ret = mlx4_priv_flow_start(priv);
> +	if (ret) {
> +		ERROR("%p: flow start failed: %s",
> +		      (void *)dev, strerror(ret));
> +		goto err;
> +	}
>  	priv_unlock(priv);
>  	return 0;
> +err:
> +	/* Rollback. */
> +	while (i != 0) {
> +		rxq = (*priv->rxqs)[i--];
> +		if (rxq != NULL) {
> +			rxq_allmulticast_disable(rxq);
> +			rxq_promiscuous_disable(rxq);
> +			rxq_mac_addrs_del(rxq);
> +		}
> +	}
> +	priv->started = 0;
> +	priv_unlock(priv);
> +	return -ret;
>  }
>  
>  /**
> @@ -4011,6 +4021,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		rxq = (*priv->rxqs)[0];
>  		r = priv->rxqs_n;
>  	}
> +	mlx4_priv_flow_stop(priv);
>  	/* Iterate only once when RSS is enabled. */
>  	do {
>  		/* Ignore nonexistent RX queues. */
> @@ -5012,6 +5023,55 @@ struct txq_mp2mr_mbuf_check_data {
>  	return -ret;
>  }
>  
> +const struct rte_flow_ops mlx4_flow_ops = {
> +	.validate = mlx4_flow_validate,
> +	.create = mlx4_flow_create,
> +	.destroy = mlx4_flow_destroy,
> +	.flush = mlx4_flow_flush,
> +	.query = NULL,
> +};
> +
> +/**
> + * Manage filter operations.
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + * @param filter_type
> + *   Filter type.
> + * @param filter_op
> + *   Operation to perform.
> + * @param arg
> + *   Pointer to operation-specific structure.
> + *
> + * @return
> + *   0 on success, negative errno value on failure.
> + */
> +static int
> +mlx4_dev_filter_ctrl(struct rte_eth_dev *dev,
> +		     enum rte_filter_type filter_type,
> +		     enum rte_filter_op filter_op,
> +		     void *arg)
> +{
> +	int ret = EINVAL;
> +
> +	switch (filter_type) {
> +	case RTE_ETH_FILTER_GENERIC:
> +		if (filter_op != RTE_ETH_FILTER_GET)
> +			return -EINVAL;
> +		*(const void **)arg = &mlx4_flow_ops;
> +		return 0;
> +	case RTE_ETH_FILTER_FDIR:
> +		DEBUG("%p: filter type FDIR is not supported by this PMD",
> +		      (void *)dev);
> +		break;
> +	default:
> +		ERROR("%p: filter type (%d) not supported",
> +		      (void *)dev, filter_type);
> +		break;
> +	}
> +	return -ret;
> +}
> +
>  static const struct eth_dev_ops mlx4_dev_ops = {
>  	.dev_configure = mlx4_dev_configure,
>  	.dev_start = mlx4_dev_start,
> @@ -5046,6 +5106,7 @@ struct txq_mp2mr_mbuf_check_data {
>  	.mac_addr_add = mlx4_mac_addr_add,
>  	.mac_addr_set = mlx4_mac_addr_set,
>  	.mtu_set = mlx4_dev_set_mtu,
> +	.filter_ctrl = mlx4_dev_filter_ctrl,
>  };
>  
>  /**
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index 70c9ecd..fac408b 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -295,6 +295,8 @@ struct txq {
>  	struct ibv_exp_res_domain *rd; /* Resource Domain. */
>  };
>  
> +struct rte_flow;
> +
>  struct priv {
>  	struct rte_eth_dev *dev; /* Ethernet device. */
>  	struct ibv_context *ctx; /* Verbs context. */
> @@ -337,6 +339,7 @@ struct priv {
>  	struct rxq *(*rxqs)[]; /* RX queues. */
>  	struct txq *(*txqs)[]; /* TX queues. */
>  	struct rte_intr_handle intr_handle; /* Interrupt handler. */
> +	LIST_HEAD(mlx4_flows, rte_flow) flows;
>  	rte_spinlock_t lock; /* Lock for control functions. */
>  };
>  
> diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
> new file mode 100644
> index 0000000..65537c7
> --- /dev/null
> +++ b/drivers/net/mlx4/mlx4_flow.c
> @@ -0,0 +1,1043 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright 2017 6WIND S.A.
> + *   Copyright 2017 Mellanox.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of 6WIND S.A. nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <assert.h>
> +
> +#include <rte_flow.h>
> +#include <rte_flow_driver.h>
> +#include <rte_malloc.h>
> +
> +/* Generated configuration header. */
> +#include "mlx4_autoconf.h"
> +
> +/* PMD headers. */
> +#include "mlx4.h"
> +#include "mlx4_flow.h"
> +
> +/** Static initializer for items. */
> +#define ITEMS(...) \
> +	(const enum rte_flow_item_type []){ \
> +		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
> +	}
> +
> +/** Structure to generate a simple graph of layers supported by the NIC. */
> +struct mlx4_flow_items {
> +	/** List of possible actions for these items. */
> +	const enum rte_flow_action_type *const actions;
> +	/** Bit-masks corresponding to the possibilities for the item. */
> +	const void *mask;
> +	/**
> +	 * Default bit-masks to use when item->mask is not provided. When
> +	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
> +	 * used instead.
> +	 */
> +	const void *default_mask;
> +	/** Bit-masks size in bytes. */
> +	const unsigned int mask_sz;
> +	/**
> +	 * Check support for a given item.
> +	 *
> +	 * @param item[in]
> +	 *   Item specification.
> +	 * @param mask[in]
> +	 *   Bit-masks covering supported fields to compare with spec,
> +	 *   last and mask in
> +	 *   \item.
> +	 * @param size
> +	 *   Bit-Mask size in bytes.
> +	 *
> +	 * @return
> +	 *   0 on success, negative value otherwise.
> +	 */
> +	int (*validate)(const struct rte_flow_item *item,
> +			const uint8_t *mask, unsigned int size);
> +	/**
> +	 * Conversion function from rte_flow to NIC specific flow.
> +	 *
> +	 * @param item
> +	 *   rte_flow item to convert.
> +	 * @param default_mask
> +	 *   Default bit-masks to use when item->mask is not provided.
> +	 * @param data
> +	 *   Internal structure to store the conversion.
> +	 *
> +	 * @return
> +	 *   0 on success, negative value otherwise.
> +	 */
> +	int (*convert)(const struct rte_flow_item *item,
> +		       const void *default_mask,
> +		       void *data);
> +	/** Size in bytes of the destination structure. */
> +	const unsigned int dst_sz;
> +	/** List of possible following items.  */
> +	const enum rte_flow_item_type *const items;
> +};
> +
> +/** Valid action for this PMD. */
> +static const enum rte_flow_action_type valid_actions[] = {
> +	RTE_FLOW_ACTION_TYPE_DROP,
> +	RTE_FLOW_ACTION_TYPE_QUEUE,
> +	RTE_FLOW_ACTION_TYPE_END,
> +};
> +
> +/**
> + * Convert Ethernet item to Verbs specification.
> + *
> + * @param item[in]
> + *   Item specification.
> + * @param default_mask[in]
> + *   Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + *   User structure.
> + */
> +static int
> +mlx4_flow_create_eth(const struct rte_flow_item *item,
> +		     const void *default_mask,
> +		     void *data)
> +{
> +	const struct rte_flow_item_eth *spec = item->spec;
> +	const struct rte_flow_item_eth *mask = item->mask;
> +	struct mlx4_flow *flow = (struct mlx4_flow *)data;
> +	struct ibv_flow_spec_eth *eth;
> +	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
> +	unsigned int i;
> +
> +	++flow->ibv_attr->num_of_specs;
> +	flow->ibv_attr->priority = 2;
> +	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> +	*eth = (struct ibv_flow_spec_eth) {
> +		.type = IBV_FLOW_SPEC_ETH,
> +		.size = eth_size,
> +	};
> +	if (!spec) {
> +		flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
> +		return 0;
> +	}
> +	if (!mask)
> +		mask = default_mask;
> +	memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
> +	memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
> +	memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
> +	memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
> +	/* Remove unwanted bits from values. */
> +	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
> +		eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
> +		eth->val.src_mac[i] &= eth->mask.src_mac[i];
> +	}
> +	return 0;
> +}
> +
> +/**
> + * Convert VLAN item to Verbs specification.
> + *
> + * @param item[in]
> + *   Item specification.
> + * @param default_mask[in]
> + *   Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + *   User structure.
> + */
> +static int
> +mlx4_flow_create_vlan(const struct rte_flow_item *item,
> +		      const void *default_mask,
> +		      void *data)
> +{
> +	const struct rte_flow_item_vlan *spec = item->spec;
> +	const struct rte_flow_item_vlan *mask = item->mask;
> +	struct mlx4_flow *flow = (struct mlx4_flow *)data;
> +	struct ibv_flow_spec_eth *eth;
> +	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
> +
> +	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
> +	if (!spec)
> +		return 0;
> +	if (!mask)
> +		mask = default_mask;
> +	eth->val.vlan_tag = spec->tci;
> +	eth->mask.vlan_tag = mask->tci;
> +	eth->val.vlan_tag &= eth->mask.vlan_tag;
> +	return 0;
> +}
> +
> +/**
> + * Convert IPv4 item to Verbs specification.
> + *
> + * @param item[in]
> + *   Item specification.
> + * @param default_mask[in]
> + *   Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + *   User structure.
> + */
> +static int
> +mlx4_flow_create_ipv4(const struct rte_flow_item *item,
> +		      const void *default_mask,
> +		      void *data)
> +{
> +	const struct rte_flow_item_ipv4 *spec = item->spec;
> +	const struct rte_flow_item_ipv4 *mask = item->mask;
> +	struct mlx4_flow *flow = (struct mlx4_flow *)data;
> +	struct ibv_flow_spec_ipv4 *ipv4;
> +	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4);
> +
> +	++flow->ibv_attr->num_of_specs;
> +	flow->ibv_attr->priority = 1;
> +	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> +	*ipv4 = (struct ibv_flow_spec_ipv4) {
> +		.type = IBV_FLOW_SPEC_IPV4,
> +		.size = ipv4_size,
> +	};
> +	if (!spec)
> +		return 0;
> +	ipv4->val = (struct ibv_flow_ipv4_filter) {
> +		.src_ip = spec->hdr.src_addr,
> +		.dst_ip = spec->hdr.dst_addr,
> +	};
> +	if (!mask)
> +		mask = default_mask;
> +	ipv4->mask = (struct ibv_flow_ipv4_filter) {
> +		.src_ip = mask->hdr.src_addr,
> +		.dst_ip = mask->hdr.dst_addr,
> +	};
> +	/* Remove unwanted bits from values. */
> +	ipv4->val.src_ip &= ipv4->mask.src_ip;
> +	ipv4->val.dst_ip &= ipv4->mask.dst_ip;
> +	return 0;
> +}
> +
> +/**
> + * Convert UDP item to Verbs specification.
> + *
> + * @param item[in]
> + *   Item specification.
> + * @param default_mask[in]
> + *   Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + *   User structure.
> + */
> +static int
> +mlx4_flow_create_udp(const struct rte_flow_item *item,
> +		     const void *default_mask,
> +		     void *data)
> +{
> +	const struct rte_flow_item_udp *spec = item->spec;
> +	const struct rte_flow_item_udp *mask = item->mask;
> +	struct mlx4_flow *flow = (struct mlx4_flow *)data;
> +	struct ibv_flow_spec_tcp_udp *udp;
> +	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
> +
> +	++flow->ibv_attr->num_of_specs;
> +	flow->ibv_attr->priority = 0;
> +	udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> +	*udp = (struct ibv_flow_spec_tcp_udp) {
> +		.type = IBV_FLOW_SPEC_UDP,
> +		.size = udp_size,
> +	};
> +	if (!spec)
> +		return 0;
> +	udp->val.dst_port = spec->hdr.dst_port;
> +	udp->val.src_port = spec->hdr.src_port;
> +	if (!mask)
> +		mask = default_mask;
> +	udp->mask.dst_port = mask->hdr.dst_port;
> +	udp->mask.src_port = mask->hdr.src_port;
> +	/* Remove unwanted bits from values. */
> +	udp->val.src_port &= udp->mask.src_port;
> +	udp->val.dst_port &= udp->mask.dst_port;
> +	return 0;
> +}
> +
> +/**
> + * Convert TCP item to Verbs specification.
> + *
> + * @param item[in]
> + *   Item specification.
> + * @param default_mask[in]
> + *   Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + *   User structure.
> + */
> +static int
> +mlx4_flow_create_tcp(const struct rte_flow_item *item,
> +		     const void *default_mask,
> +		     void *data)
> +{
> +	const struct rte_flow_item_tcp *spec = item->spec;
> +	const struct rte_flow_item_tcp *mask = item->mask;
> +	struct mlx4_flow *flow = (struct mlx4_flow *)data;
> +	struct ibv_flow_spec_tcp_udp *tcp;
> +	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
> +
> +	++flow->ibv_attr->num_of_specs;
> +	flow->ibv_attr->priority = 0;
> +	tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
> +	*tcp = (struct ibv_flow_spec_tcp_udp) {
> +		.type = IBV_FLOW_SPEC_TCP,
> +		.size = tcp_size,
> +	};
> +	if (!spec)
> +		return 0;
> +	tcp->val.dst_port = spec->hdr.dst_port;
> +	tcp->val.src_port = spec->hdr.src_port;
> +	if (!mask)
> +		mask = default_mask;
> +	tcp->mask.dst_port = mask->hdr.dst_port;
> +	tcp->mask.src_port = mask->hdr.src_port;
> +	/* Remove unwanted bits from values. */
> +	tcp->val.src_port &= tcp->mask.src_port;
> +	tcp->val.dst_port &= tcp->mask.dst_port;
> +	return 0;
> +}
> +
> +/**
> + * Check support for a given item.
> + *
> + * @param item[in]
> + *   Item specification.
> + * @param mask[in]
> + *   Bit-masks covering supported fields to compare with spec, last and mask in
> + *   \item.
> + * @param size
> + *   Bit-Mask size in bytes.
> + *
> + * @return
> + *   0 on success, negative value otherwise.
> + */
> +static int
> +mlx4_flow_item_validate(const struct rte_flow_item *item,
> +			const uint8_t *mask, unsigned int size)
> +{
> +	int ret = 0;
> +
> +	if (!item->spec && (item->mask || item->last))
> +		return -1;
> +	if (item->spec && !item->mask) {
> +		unsigned int i;
> +		const uint8_t *spec = item->spec;
> +
> +		for (i = 0; i < size; ++i)
> +			if ((spec[i] | mask[i]) != mask[i])
> +				return -1;
> +	}
> +	if (item->last && !item->mask) {
> +		unsigned int i;
> +		const uint8_t *spec = item->last;
> +
> +		for (i = 0; i < size; ++i)
> +			if ((spec[i] | mask[i]) != mask[i])
> +				return -1;
> +	}
> +	if (item->spec && item->last) {
> +		uint8_t spec[size];
> +		uint8_t last[size];
> +		const uint8_t *apply = mask;
> +		unsigned int i;
> +
> +		if (item->mask)
> +			apply = item->mask;
> +		for (i = 0; i < size; ++i) {
> +			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
> +			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
> +		}
> +		ret = memcmp(spec, last, size);
> +	}
> +	return ret;
> +}
> +
> +static int
> +mlx4_flow_validate_eth(const struct rte_flow_item *item,
> +		       const uint8_t *mask, unsigned int size)
> +{
> +	if (item->mask) {
> +		const struct rte_flow_item_eth *mask = item->mask;
> +
> +		if (mask->dst.addr_bytes[0] != 0xff ||
> +				mask->dst.addr_bytes[1] != 0xff ||
> +				mask->dst.addr_bytes[2] != 0xff ||
> +				mask->dst.addr_bytes[3] != 0xff ||
> +				mask->dst.addr_bytes[4] != 0xff ||
> +				mask->dst.addr_bytes[5] != 0xff)
> +			return -1;
> +	}
> +	return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_vlan(const struct rte_flow_item *item,
> +			const uint8_t *mask, unsigned int size)
> +{
> +	if (item->mask) {
> +		const struct rte_flow_item_vlan *mask = item->mask;
> +
> +		if (mask->tci != 0 &&
> +		    ntohs(mask->tci) != 0x0fff)
> +			return -1;
> +	}
> +	return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_ipv4(const struct rte_flow_item *item,
> +			const uint8_t *mask, unsigned int size)
> +{
> +	if (item->mask) {
> +		const struct rte_flow_item_ipv4 *mask = item->mask;
> +
> +		if (mask->hdr.src_addr != 0 &&
> +		    mask->hdr.src_addr != 0xffffffff)
> +			return -1;
> +		if (mask->hdr.dst_addr != 0 &&
> +		    mask->hdr.dst_addr != 0xffffffff)
> +			return -1;
> +	}
> +	return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_udp(const struct rte_flow_item *item,
> +		       const uint8_t *mask, unsigned int size)
> +{
> +	if (item->mask) {
> +		const struct rte_flow_item_udp *mask = item->mask;
> +
> +		if (mask->hdr.src_port != 0 &&
> +		    mask->hdr.src_port != 0xffff)
> +			return -1;
> +		if (mask->hdr.dst_port != 0 &&
> +		    mask->hdr.dst_port != 0xffff)
> +			return -1;
> +	}
> +	return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +static int
> +mlx4_flow_validate_tcp(const struct rte_flow_item *item,
> +		       const uint8_t *mask, unsigned int size)
> +{
> +	if (item->mask) {
> +		const struct rte_flow_item_tcp *mask = item->mask;
> +
> +		if (mask->hdr.src_port != 0 &&
> +		    mask->hdr.src_port != 0xffff)
> +			return -1;
> +		if (mask->hdr.dst_port != 0 &&
> +		    mask->hdr.dst_port != 0xffff)
> +			return -1;
> +	}
> +	return mlx4_flow_item_validate(item, mask, size);
> +}
> +
> +/** Graph of supported items and associated actions. */
> +static const struct mlx4_flow_items mlx4_flow_items[] = {
> +	[RTE_FLOW_ITEM_TYPE_END] = {
> +		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
> +	},
> +	[RTE_FLOW_ITEM_TYPE_ETH] = {
> +		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
> +			       RTE_FLOW_ITEM_TYPE_IPV4),
> +		.actions = valid_actions,
> +		.mask = &(const struct rte_flow_item_eth){
> +			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
> +			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
> +		},
> +		.default_mask = &rte_flow_item_eth_mask,
> +		.mask_sz = sizeof(struct rte_flow_item_eth),
> +		.validate = mlx4_flow_validate_eth,
> +		.convert = mlx4_flow_create_eth,
> +		.dst_sz = sizeof(struct ibv_flow_spec_eth),
> +	},
> +	[RTE_FLOW_ITEM_TYPE_VLAN] = {
> +		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4),
> +		.actions = valid_actions,
> +		.mask = &(const struct rte_flow_item_vlan){
> +		/* rte_flow_item_vlan_mask is invalid for mlx4. */
> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> +			.tci = 0x0fff,
> +#else
> +			.tci = 0xff0f,
> +#endif
> +		},
> +		.mask_sz = sizeof(struct rte_flow_item_vlan),
> +		.validate = mlx4_flow_validate_vlan,
> +		.convert = mlx4_flow_create_vlan,
> +		.dst_sz = 0,
> +	},
> +	[RTE_FLOW_ITEM_TYPE_IPV4] = {
> +		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
> +			       RTE_FLOW_ITEM_TYPE_TCP),
> +		.actions = valid_actions,
> +		.mask = &(const struct rte_flow_item_ipv4){
> +			.hdr = {
> +				.src_addr = -1,
> +				.dst_addr = -1,
> +			},
> +		},
> +		.default_mask = &rte_flow_item_ipv4_mask,
> +		.mask_sz = sizeof(struct rte_flow_item_ipv4),
> +		.validate = mlx4_flow_validate_ipv4,
> +		.convert = mlx4_flow_create_ipv4,
> +		.dst_sz = sizeof(struct ibv_flow_spec_ipv4),
> +	},
> +	[RTE_FLOW_ITEM_TYPE_UDP] = {
> +		.actions = valid_actions,
> +		.mask = &(const struct rte_flow_item_udp){
> +			.hdr = {
> +				.src_port = -1,
> +				.dst_port = -1,
> +			},
> +		},
> +		.default_mask = &rte_flow_item_udp_mask,
> +		.mask_sz = sizeof(struct rte_flow_item_udp),
> +		.validate = mlx4_flow_validate_udp,
> +		.convert = mlx4_flow_create_udp,
> +		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
> +	},
> +	[RTE_FLOW_ITEM_TYPE_TCP] = {
> +		.actions = valid_actions,
> +		.mask = &(const struct rte_flow_item_tcp){
> +			.hdr = {
> +				.src_port = -1,
> +				.dst_port = -1,
> +			},
> +		},
> +		.default_mask = &rte_flow_item_tcp_mask,
> +		.mask_sz = sizeof(struct rte_flow_item_tcp),
> +		.validate = mlx4_flow_validate_tcp,
> +		.convert = mlx4_flow_create_tcp,
> +		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
> +	},
> +};
> +
> +/**
> + * Validate a flow supported by the NIC.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param[in] attr
> + *   Flow rule attributes.
> + * @param[in] items
> + *   Pattern specification (list terminated by the END pattern item).
> + * @param[in] actions
> + *   Associated actions (list terminated by the END action).
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL.
> + * @param[in, out] flow
> + *   Flow structure to update.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +priv_flow_validate(struct priv *priv,
> +		   const struct rte_flow_attr *attr,
> +		   const struct rte_flow_item items[],
> +		   const struct rte_flow_action actions[],
> +		   struct rte_flow_error *error,
> +		   struct mlx4_flow *flow)
> +{
> +	const struct mlx4_flow_items *cur_item = mlx4_flow_items;
> +	struct mlx4_flow_action action = {
> +		.queue = 0,
> +		.drop = 0,
> +	};
> +
> +	(void)priv;
> +	if (attr->group) {
> +		rte_flow_error_set(error, ENOTSUP,
> +				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
> +				   NULL,
> +				   "groups are not supported");
> +		return -rte_errno;
> +	}
> +	if (attr->priority) {
> +		rte_flow_error_set(error, ENOTSUP,
> +				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
> +				   NULL,
> +				   "priorities are not supported");
> +		return -rte_errno;
> +	}
> +	if (attr->egress) {
> +		rte_flow_error_set(error, ENOTSUP,
> +				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
> +				   NULL,
> +				   "egress is not supported");
> +		return -rte_errno;
> +	}
> +	if (!attr->ingress) {
> +		rte_flow_error_set(error, ENOTSUP,
> +				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
> +				   NULL,
> +				   "only ingress is supported");
> +		return -rte_errno;
> +	}
> +	/* Go over items list. */
> +	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
> +		const struct mlx4_flow_items *token = NULL;
> +		unsigned int i;
> +		int err;
> +
> +		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
> +			continue;
> +		/*
> +		 * The nic can support patterns with NULL eth spec only
> +		 * if eth is a single item in a rule.
> +		 */
> +		if (!items->spec &&
> +			items->type == RTE_FLOW_ITEM_TYPE_ETH) {
> +			const struct rte_flow_item *next = items + 1;
> +
> +			if (next->type != RTE_FLOW_ITEM_TYPE_END) {
> +				rte_flow_error_set(error, ENOTSUP,
> +						   RTE_FLOW_ERROR_TYPE_ITEM,
> +						   items,
> +						   "the rule requires"
> +						   " an Ethernet spec");
> +				return -rte_errno;
> +			}
> +		}
> +		for (i = 0;
> +		     cur_item->items &&
> +		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
> +		     ++i) {
> +			if (cur_item->items[i] == items->type) {
> +				token = &mlx4_flow_items[items->type];
> +				break;
> +			}
> +		}
> +		if (!token)
> +			goto exit_item_not_supported;
> +		cur_item = token;
> +		err = cur_item->validate(items,
> +					(const uint8_t *)cur_item->mask,
> +					 cur_item->mask_sz);
> +		if (err)
> +			goto exit_item_not_supported;
> +		if (flow->ibv_attr && cur_item->convert) {
> +			err = cur_item->convert(items,
> +						(cur_item->default_mask ?
> +						 cur_item->default_mask :
> +						 cur_item->mask),
> +						 flow);
> +			if (err)
> +				goto exit_item_not_supported;
> +		}
> +		flow->offset += cur_item->dst_sz;
> +	}
> +	/* Go over actions list */
> +	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
> +		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
> +			continue;
> +		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
> +			action.drop = 1;
> +		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
> +			const struct rte_flow_action_queue *queue =
> +				(const struct rte_flow_action_queue *)
> +				actions->conf;
> +
> +			if (!queue || (queue->index > (priv->rxqs_n - 1)))
> +				goto exit_action_not_supported;
> +			action.queue = 1;
> +		} else {
> +			goto exit_action_not_supported;
> +		}
> +	}
> +	if (!action.queue && !action.drop) {
> +		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
> +				   NULL, "no valid action");
> +		return -rte_errno;
> +	}
> +	return 0;
> +exit_item_not_supported:
> +	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
> +			   items, "item not supported");
> +	return -rte_errno;
> +exit_action_not_supported:
> +	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
> +			   actions, "action not supported");
> +	return -rte_errno;
> +}
> +
> +/**
> + * Validate a flow supported by the NIC.
> + *
> + * @see rte_flow_validate()
> + * @see rte_flow_ops
> + */
> +int
> +mlx4_flow_validate(struct rte_eth_dev *dev,
> +		   const struct rte_flow_attr *attr,
> +		   const struct rte_flow_item items[],
> +		   const struct rte_flow_action actions[],
> +		   struct rte_flow_error *error)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +	int ret;
> +	struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr) };
> +
> +	priv_lock(priv);
> +	ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
> +	priv_unlock(priv);
> +	return ret;
> +}
> +
> +/**
> + * Complete flow rule creation.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param ibv_attr
> + *   Verbs flow attributes.
> + * @param action
> + *   Target action structure.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL.
> + *
> + * @return
> + *   A flow if the rule could be created.
> + */
> +static struct rte_flow *
> +priv_flow_create_action_queue(struct priv *priv,
> +			      struct ibv_flow_attr *ibv_attr,
> +			      struct mlx4_flow_action *action,
> +			      struct rte_flow_error *error)
> +{
> +	struct rxq *rxq;
> +	struct ibv_qp *qp;
> +	struct rte_flow *rte_flow;
> +
> +	assert(priv->pd);
> +	assert(priv->ctx);
> +	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
> +	if (!rte_flow) {
> +		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
> +				   NULL, "cannot allocate flow memory");
> +		return NULL;
> +	}
> +	rxq = (*priv->rxqs)[action->queue_id];
> +	if (action->drop) {
> +		rte_flow->cq =
> +			ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
> +					  &(struct ibv_exp_cq_init_attr){
> +						  .comp_mask = 0,
> +					  });
> +		if (!rte_flow->cq) {
> +			rte_flow_error_set(error, ENOMEM,
> +					   RTE_FLOW_ERROR_TYPE_HANDLE,
> +					   NULL, "cannot allocate CQ");
> +			goto error;
> +		}
> +		rte_flow->qp = ibv_exp_create_qp(
> +			priv->ctx,
> +			&(struct ibv_exp_qp_init_attr){
> +				.send_cq = rte_flow->cq,
> +				.recv_cq = rte_flow->cq,
> +				.cap = {
> +					.max_recv_wr = 1,
> +					.max_recv_sge = 1,
> +				},
> +				.qp_type = IBV_QPT_RAW_PACKET,
> +				.comp_mask =
> +					IBV_EXP_QP_INIT_ATTR_PD |
> +					IBV_EXP_QP_INIT_ATTR_PORT |
> +					IBV_EXP_QP_INIT_ATTR_RES_DOMAIN,
> +				.pd = priv->pd,
> +				.res_domain = rxq->rd,
> +				.port_num = priv->port,
> +			});
> +		if (!rte_flow->qp) {
> +			rte_flow_error_set(error, ENOMEM,
> +					   RTE_FLOW_ERROR_TYPE_HANDLE,
> +					   NULL, "cannot allocate QP");
> +			goto error;
> +		}
> +		qp = rte_flow->qp;
> +	} else {
> +		rte_flow->rxq = rxq;
> +		qp = rxq->qp;
> +	}
> +	rte_flow->ibv_attr = ibv_attr;
> +	rte_flow->ibv_flow = ibv_create_flow(qp, rte_flow->ibv_attr);
> +	if (!rte_flow->ibv_flow) {
> +		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
> +				   NULL, "flow rule creation failure");
> +		goto error;
> +	}
> +	return rte_flow;
> +
> +error:
> +	assert(rte_flow);
> +	if (rte_flow->cq)
> +		ibv_destroy_cq(rte_flow->cq);
> +	if (rte_flow->qp)
> +		ibv_destroy_qp(rte_flow->qp);
> +	rte_free(rte_flow->ibv_attr);
> +	rte_free(rte_flow);
> +	return NULL;
> +}
> +
> +/**
> + * Convert a flow.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param[in] attr
> + *   Flow rule attributes.
> + * @param[in] items
> + *   Pattern specification (list terminated by the END pattern item).
> + * @param[in] actions
> + *   Associated actions (list terminated by the END action).
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL.
> + *
> + * @return
> + *   A flow on success, NULL otherwise.
> + */
> +static struct rte_flow *
> +priv_flow_create(struct priv *priv,
> +		 const struct rte_flow_attr *attr,
> +		 const struct rte_flow_item items[],
> +		 const struct rte_flow_action actions[],
> +		 struct rte_flow_error *error)
> +{
> +	struct rte_flow *rte_flow;
> +	struct mlx4_flow_action action;
> +	struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr), };
> +	int err;
> +
> +	err = priv_flow_validate(priv, attr, items, actions, error, &flow);
> +	if (err)
> +		return NULL;
> +	flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
> +	if (!flow.ibv_attr) {
> +		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
> +				   NULL, "cannot allocate ibv_attr memory");
> +		return NULL;
> +	}
> +	flow.offset = sizeof(struct ibv_flow_attr);
> +	*flow.ibv_attr = (struct ibv_flow_attr){
> +		.comp_mask = 0,
> +		.type = IBV_FLOW_ATTR_NORMAL,
> +		.size = sizeof(struct ibv_flow_attr),
> +		.priority = attr->priority,
> +		.num_of_specs = 0,
> +		.port = priv->port,
> +		.flags = 0,
> +	};
> +	claim_zero(priv_flow_validate(priv, attr, items, actions,
> +				      error, &flow));
> +	action = (struct mlx4_flow_action){
> +		.queue = 0,
> +		.drop = 0,
> +	};
> +	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
> +		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
> +			continue;
> +		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
> +			action.queue = 1;
> +			action.queue_id =
> +				((const struct rte_flow_action_queue *)
> +				 actions->conf)->index;
> +		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
> +			action.drop = 1;
> +		} else {
> +			rte_flow_error_set(error, ENOTSUP,
> +					   RTE_FLOW_ERROR_TYPE_ACTION,
> +					   actions, "unsupported action");
> +			goto exit;
> +		}
> +	}
> +	rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
> +						 &action, error);
> +	return rte_flow;
> +exit:
> +	rte_free(flow.ibv_attr);
> +	return NULL;
> +}
> +
> +/**
> + * Create a flow.
> + *
> + * @see rte_flow_create()
> + * @see rte_flow_ops
> + */
> +struct rte_flow *
> +mlx4_flow_create(struct rte_eth_dev *dev,
> +		 const struct rte_flow_attr *attr,
> +		 const struct rte_flow_item items[],
> +		 const struct rte_flow_action actions[],
> +		 struct rte_flow_error *error)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +	struct rte_flow *flow;
> +
> +	priv_lock(priv);
> +	flow = priv_flow_create(priv, attr, items, actions, error);
> +	if (flow) {
> +		LIST_INSERT_HEAD(&priv->flows, flow, next);
> +		DEBUG("Flow created %p", (void *)flow);
> +	}
> +	priv_unlock(priv);
> +	return flow;
> +}
> +
> +/**
> + * Destroy a flow.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param[in] flow
> + *   Flow to destroy.
> + */
> +static void
> +priv_flow_destroy(struct priv *priv, struct rte_flow *flow)
> +{
> +	(void)priv;
> +	LIST_REMOVE(flow, next);
> +	if (flow->ibv_flow)
> +		claim_zero(ibv_destroy_flow(flow->ibv_flow));
> +	if (flow->qp)
> +		claim_zero(ibv_destroy_qp(flow->qp));
> +	if (flow->cq)
> +		claim_zero(ibv_destroy_cq(flow->cq));
> +	rte_free(flow->ibv_attr);
> +	DEBUG("Flow destroyed %p", (void *)flow);
> +	rte_free(flow);
> +}
> +
> +/**
> + * Destroy a flow.
> + *
> + * @see rte_flow_destroy()
> + * @see rte_flow_ops
> + */
> +int
> +mlx4_flow_destroy(struct rte_eth_dev *dev,
> +		  struct rte_flow *flow,
> +		  struct rte_flow_error *error)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +
> +	(void)error;
> +	priv_lock(priv);
> +	priv_flow_destroy(priv, flow);
> +	priv_unlock(priv);
> +	return 0;
> +}
> +
> +/**
> + * Destroy all flows.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + */
> +static void
> +priv_flow_flush(struct priv *priv)
> +{
> +	while (!LIST_EMPTY(&priv->flows)) {
> +		struct rte_flow *flow;
> +
> +		flow = LIST_FIRST(&priv->flows);
> +		priv_flow_destroy(priv, flow);
> +	}
> +}
> +
> +/**
> + * Destroy all flows.
> + *
> + * @see rte_flow_flush()
> + * @see rte_flow_ops
> + */
> +int
> +mlx4_flow_flush(struct rte_eth_dev *dev,
> +		struct rte_flow_error *error)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +
> +	(void)error;
> +	priv_lock(priv);
> +	priv_flow_flush(priv);
> +	priv_unlock(priv);
> +	return 0;
> +}
> +
> +/**
> + * Remove all flows.
> + *
> + * Called by dev_stop() to remove all flows.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + */
> +void
> +mlx4_priv_flow_stop(struct priv *priv)
> +{
> +	struct rte_flow *flow;
> +
> +	for (flow = LIST_FIRST(&priv->flows);
> +	     flow;
> +	     flow = LIST_NEXT(flow, next)) {
> +		claim_zero(ibv_destroy_flow(flow->ibv_flow));
> +		flow->ibv_flow = NULL;
> +		DEBUG("Flow %p removed", (void *)flow);
> +	}
> +}
> +
> +/**
> + * Add all flows.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + *
> + * @return
> + *   0 on success, a errno value otherwise and rte_errno is set.
> + */
> +int
> +mlx4_priv_flow_start(struct priv *priv)
> +{
> +	struct ibv_qp *qp;
> +	struct rte_flow *flow;
> +
> +	for (flow = LIST_FIRST(&priv->flows);
> +	     flow;
> +	     flow = LIST_NEXT(flow, next)) {
> +		qp = flow->qp ? flow->qp : flow->rxq->qp;
> +		flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
> +		if (!flow->ibv_flow) {
> +			DEBUG("Flow %p cannot be applied", (void *)flow);
> +			rte_errno = EINVAL;
> +			return rte_errno;
> +		}
> +		DEBUG("Flow %p applied", (void *)flow);
> +	}
> +	return 0;
> +}
> diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
> new file mode 100644
> index 0000000..66c5be6
> --- /dev/null
> +++ b/drivers/net/mlx4/mlx4_flow.h
> @@ -0,0 +1,104 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright 2017 6WIND S.A.
> + *   Copyright 2017 Mellanox.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of 6WIND S.A. nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef RTE_PMD_MLX4_FLOW_H_
> +#define RTE_PMD_MLX4_FLOW_H_
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +#include <sys/queue.h>
> +
> +/* Verbs header. */
> +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic ignored "-Wpedantic"
> +#endif
> +#include <infiniband/verbs.h>
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic error "-Wpedantic"
> +#endif
> +
> +#include <rte_flow.h>
> +#include <rte_flow_driver.h>
> +#include <rte_byteorder.h>
> +
> +#include "mlx4.h"
> +
> +struct rte_flow {
> +	LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
> +	struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
> +	struct ibv_flow *ibv_flow; /**< Verbs flow. */
> +	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
> +	struct ibv_qp *qp; /**< Verbs queue pair. */
> +	struct ibv_cq *cq; /**< Verbs completion queue. */
> +};
> +
> +int
> +mlx4_flow_validate(struct rte_eth_dev *dev,
> +		   const struct rte_flow_attr *attr,
> +		   const struct rte_flow_item items[],
> +		   const struct rte_flow_action actions[],
> +		   struct rte_flow_error *error);
> +
> +struct rte_flow *
> +mlx4_flow_create(struct rte_eth_dev *dev,
> +		 const struct rte_flow_attr *attr,
> +		 const struct rte_flow_item items[],
> +		 const struct rte_flow_action actions[],
> +		 struct rte_flow_error *error);
> +
> +int
> +mlx4_flow_destroy(struct rte_eth_dev *dev,
> +		  struct rte_flow *flow,
> +		  struct rte_flow_error *error);
> +
> +int
> +mlx4_flow_flush(struct rte_eth_dev *dev,
> +		struct rte_flow_error *error);
> +
> +/** Structure to pass to the conversion function. */
> +struct mlx4_flow {
> +	struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
> +	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
> +};
> +
> +struct mlx4_flow_action {
> +	uint32_t drop:1; /**< Target is a drop queue. */
> +	uint32_t queue:1; /**< Target is a receive queue. */
> +	uint32_t queue_id; /**< Identifier of the queue. */
> +};
> +
> +int mlx4_priv_flow_start(struct priv *priv);
> +void mlx4_priv_flow_stop(struct priv *priv);
> +
> +#endif /* RTE_PMD_MLX4_FLOW_H_ */
> -- 
> 1.8.3.1
> 

Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>

-- 
Nélio Laranjeiro
6WIND

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/2] net/mlx4: split the definitions to the header file
  2017-03-20  9:19   ` Nélio Laranjeiro
@ 2017-03-20 14:18     ` Ferruh Yigit
  0 siblings, 0 replies; 15+ messages in thread
From: Ferruh Yigit @ 2017-03-20 14:18 UTC (permalink / raw)
  To: Nélio Laranjeiro, Vasily Philipov; +Cc: dev, Adrien Mazarguil

On 3/20/2017 9:19 AM, Nélio Laranjeiro wrote:
> On Sun, Mar 05, 2017 at 09:51:31AM +0200, Vasily Philipov wrote:
>> Make priv_lock/priv_unlock functions and some other structs/defines visible
>> from different source files by placing them into mlx4.h header.
>>
>> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>

> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>

Series applied to dpdk-next-net/master, thanks.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-03-20 14:18 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-21 14:07 [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
2017-02-21 14:07 ` [dpdk-dev] [PATCH 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
2017-02-22  8:37   ` Nélio Laranjeiro
2017-02-22 10:10     ` Nélio Laranjeiro
2017-02-22  8:37 ` [dpdk-dev] [PATCH 1/2] net/mlx4: split the definitions to the header file Nélio Laranjeiro
2017-02-22 13:42 ` [dpdk-dev] [PATCH v2 " Vasily Philipov
2017-02-22 19:04   ` Ferruh Yigit
2017-02-23 10:44     ` Vasily Philipov
2017-03-06  9:24       ` Ferruh Yigit
2017-02-22 13:42 ` [dpdk-dev] [PATCH v2 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
2017-03-05  7:51 ` [dpdk-dev] [PATCH v3 1/2] net/mlx4: split the definitions to the header file Vasily Philipov
2017-03-20  9:19   ` Nélio Laranjeiro
2017-03-20 14:18     ` Ferruh Yigit
2017-03-05  7:51 ` [dpdk-dev] [PATCH v3 2/2] net/mlx4: support basic flow items and actions Vasily Philipov
2017-03-20  9:19   ` Nélio Laranjeiro

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).