* [dpdk-dev] [PATCH v2 0/2] support e-switch flow count action
@ 2018-10-11 13:04 Mordechay Haimovsky
  2018-10-11 13:04 ` [dpdk-dev] [PATCH v2 1/2] net/mlx5: refactor TC-flow infrastructure Mordechay Haimovsky
  2018-10-11 13:04 ` [dpdk-dev] [PATCH v2 2/2] net/mlx5: support e-switch flow count action Mordechay Haimovsky
  0 siblings, 2 replies; 21+ messages in thread
From: Mordechay Haimovsky @ 2018-10-11 13:04 UTC (permalink / raw)
  To: Shahaf Shuler; +Cc: dev, Mordechay Haimovsky
The following patches add support in mlx5 PMD for configuring and
reading flow counters from the device e-switch.
Moti Haimovsky (2):
  net/mlx5: refactor TC-flow infrastructure
  net/mlx5: support e-switch flow count action
 drivers/net/mlx5/mlx5.c            |  18 +-
 drivers/net/mlx5/mlx5.h            |   4 +-
 drivers/net/mlx5/mlx5_flow.c       |  29 ++-
 drivers/net/mlx5/mlx5_flow.h       |  22 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   1 +
 drivers/net/mlx5/mlx5_flow_tcf.c   | 397 ++++++++++++++++++++++++++++++++++---
 drivers/net/mlx5/mlx5_flow_verbs.c |   1 +
 7 files changed, 426 insertions(+), 46 deletions(-)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* [dpdk-dev] [PATCH v2 1/2] net/mlx5: refactor TC-flow infrastructure
  2018-10-11 13:04 [dpdk-dev] [PATCH v2 0/2] support e-switch flow count action Mordechay Haimovsky
@ 2018-10-11 13:04 ` Mordechay Haimovsky
  2018-10-14 11:07   ` Shahaf Shuler
  2018-10-11 13:04 ` [dpdk-dev] [PATCH v2 2/2] net/mlx5: support e-switch flow count action Mordechay Haimovsky
  1 sibling, 1 reply; 21+ messages in thread
From: Mordechay Haimovsky @ 2018-10-11 13:04 UTC (permalink / raw)
  To: Shahaf Shuler; +Cc: dev, Mordechay Haimovsky
This commit refactors tc_flow as a preparation to coming commits
that sends different type of messages and expect differ type of replies
while still using the same underlying routines.
Signed-off-by: Moti Haimovsky <motih@mellanox.com>
---
 drivers/net/mlx5/mlx5.c          |  18 +++----
 drivers/net/mlx5/mlx5.h          |   4 +-
 drivers/net/mlx5/mlx5_flow.h     |   8 +--
 drivers/net/mlx5/mlx5_flow_tcf.c | 113 ++++++++++++++++++++++++++++++---------
 4 files changed, 102 insertions(+), 41 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 7425e2f..20adf88 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -286,8 +286,8 @@
 		close(priv->nl_socket_route);
 	if (priv->nl_socket_rdma >= 0)
 		close(priv->nl_socket_rdma);
-	if (priv->mnl_socket)
-		mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+	if (priv->tcf_context)
+		mlx5_flow_tcf_context_destroy(priv->tcf_context);
 	ret = mlx5_hrxq_ibv_verify(dev);
 	if (ret)
 		DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -1137,8 +1137,8 @@
 	claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
 	if (vf && config.vf_nl_en)
 		mlx5_nl_mac_addr_sync(eth_dev);
-	priv->mnl_socket = mlx5_flow_tcf_socket_create();
-	if (!priv->mnl_socket) {
+	priv->tcf_context = mlx5_flow_tcf_context_create();
+	if (!priv->tcf_context) {
 		err = -rte_errno;
 		DRV_LOG(WARNING,
 			"flow rules relying on switch offloads will not be"
@@ -1153,7 +1153,7 @@
 			error.message =
 				"cannot retrieve network interface index";
 		} else {
-			err = mlx5_flow_tcf_init(priv->mnl_socket, ifindex,
+			err = mlx5_flow_tcf_init(priv->tcf_context, ifindex,
 						&error);
 		}
 		if (err) {
@@ -1161,8 +1161,8 @@
 				"flow rules relying on switch offloads will"
 				" not be supported: %s: %s",
 				error.message, strerror(rte_errno));
-			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
-			priv->mnl_socket = NULL;
+			mlx5_flow_tcf_context_destroy(priv->tcf_context);
+			priv->tcf_context = NULL;
 		}
 	}
 	TAILQ_INIT(&priv->flows);
@@ -1217,8 +1217,8 @@
 			close(priv->nl_socket_route);
 		if (priv->nl_socket_rdma >= 0)
 			close(priv->nl_socket_rdma);
-		if (priv->mnl_socket)
-			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+		if (priv->tcf_context)
+			mlx5_flow_tcf_context_destroy(priv->tcf_context);
 		if (own_domain_id)
 			claim_zero(rte_eth_switch_domain_free(priv->domain_id));
 		rte_free(priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 2dec88a..d14239c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -169,7 +169,7 @@ struct mlx5_drop {
 	struct mlx5_rxq_ibv *rxq; /* Verbs Rx queue. */
 };
 
-struct mnl_socket;
+struct mlx5_flow_tcf_context;
 
 struct priv {
 	LIST_ENTRY(priv) mem_event_cb; /* Called by memory event callback. */
@@ -236,7 +236,7 @@ struct priv {
 	rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
 	/* UAR same-page access control required in 32bit implementations. */
 #endif
-	struct mnl_socket *mnl_socket; /* Libmnl socket. */
+	struct mlx5_flow_tcf_context *tcf_context; /* TC flower context. */
 };
 
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index fee05f0..41d55e8 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -338,9 +338,9 @@ int mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
 
 /* mlx5_flow_tcf.c */
 
-int mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
-		       struct rte_flow_error *error);
-struct mnl_socket *mlx5_flow_tcf_socket_create(void);
-void mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl);
+int mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
+		       unsigned int ifindex, struct rte_flow_error *error);
+struct mlx5_flow_tcf_context *mlx5_flow_tcf_context_create(void);
+void mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx);
 
 #endif /* RTE_PMD_MLX5_FLOW_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 91f6ef6..8535a15 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -153,6 +153,19 @@ struct tc_vlan {
 #define IPV6_ADDR_LEN 16
 #endif
 
+/**
+ * Structure for holding netlink context.
+ * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
+ * Using this (8KB) buffer size ensures that netlink messages will never be
+ * truncated.
+ */
+struct mlx5_flow_tcf_context {
+	struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
+	uint32_t seq; /* Message sequence number. */
+	uint32_t buf_size; /* Message buffer size. */
+	uint8_t *buf; /* Message buffer. */
+};
+
 /** Empty masks for known item types. */
 static const union {
 	struct rte_flow_item_port_id port_id;
@@ -1429,8 +1442,8 @@ struct flow_tcf_ptoi {
 /**
  * Send Netlink message with acknowledgment.
  *
- * @param nl
- *   Libmnl socket to use.
+ * @param ctx
+ *   Flow context to use.
  * @param nlh
  *   Message to send. This function always raises the NLM_F_ACK flag before
  *   sending.
@@ -1439,12 +1452,13 @@ struct flow_tcf_ptoi {
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
+flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
 {
 	alignas(struct nlmsghdr)
 	uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
 		    nlh->nlmsg_len - sizeof(*nlh)];
-	uint32_t seq = random();
+	uint32_t seq = ctx->seq++;
+	struct mnl_socket *nl = ctx->nl;
 	int ret;
 
 	nlh->nlmsg_flags |= NLM_F_ACK;
@@ -1479,7 +1493,7 @@ struct flow_tcf_ptoi {
 	       struct rte_flow_error *error)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct mnl_socket *nl = priv->mnl_socket;
+	struct mlx5_flow_tcf_context *nl = priv->tcf_context;
 	struct mlx5_flow *dev_flow;
 	struct nlmsghdr *nlh;
 
@@ -1508,7 +1522,7 @@ struct flow_tcf_ptoi {
 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct mnl_socket *nl = priv->mnl_socket;
+	struct mlx5_flow_tcf_context *nl = priv->tcf_context;
 	struct mlx5_flow *dev_flow;
 	struct nlmsghdr *nlh;
 
@@ -1560,10 +1574,47 @@ struct flow_tcf_ptoi {
 };
 
 /**
- * Initialize ingress qdisc of a given network interface.
+ * Create and configure a libmnl socket for Netlink flow rules.
+ *
+ * @return
+ *   A valid libmnl socket object pointer on success, NULL otherwise and
+ *   rte_errno is set.
+ */
+static struct mnl_socket *
+mlx5_flow_mnl_socket_create(void)
+{
+	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
+
+	if (nl) {
+		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
+				      sizeof(int));
+		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
+			return nl;
+	}
+	rte_errno = errno;
+	if (nl)
+		mnl_socket_close(nl);
+	return NULL;
+}
+
+/**
+ * Destroy a libmnl socket.
  *
  * @param nl
  *   Libmnl socket of the @p NETLINK_ROUTE kind.
+ */
+static void
+mlx5_flow_mnl_socket_destroy(struct mnl_socket *nl)
+{
+	if (nl)
+		mnl_socket_close(nl);
+}
+
+/**
+ * Initialize ingress qdisc of a given network interface.
+ *
+ * @param nl
+ *   Pointer to tc-flower context to use.
  * @param ifindex
  *   Index of network interface to initialize.
  * @param[out] error
@@ -1573,8 +1624,8 @@ struct flow_tcf_ptoi {
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
-		   struct rte_flow_error *error)
+mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *nl,
+		   unsigned int ifindex, struct rte_flow_error *error)
 {
 	struct nlmsghdr *nlh;
 	struct tcmsg *tcm;
@@ -1616,37 +1667,47 @@ struct flow_tcf_ptoi {
 }
 
 /**
- * Create and configure a libmnl socket for Netlink flow rules.
+ * Create libmnl context for Netlink flow rules.
  *
  * @return
  *   A valid libmnl socket object pointer on success, NULL otherwise and
  *   rte_errno is set.
  */
-struct mnl_socket *
-mlx5_flow_tcf_socket_create(void)
+struct mlx5_flow_tcf_context *
+mlx5_flow_tcf_context_create(void)
 {
-	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
-
-	if (nl) {
-		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
-				      sizeof(int));
-		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
-			return nl;
-	}
-	rte_errno = errno;
-	if (nl)
-		mnl_socket_close(nl);
+	struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
+							sizeof(*ctx),
+							sizeof(uint32_t));
+	if (!ctx)
+		goto error;
+	ctx->nl = mlx5_flow_mnl_socket_create();
+	if (!ctx->nl)
+		goto error;
+	ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
+	ctx->buf = rte_zmalloc(__func__,
+			       ctx->buf_size, sizeof(uint32_t));
+	if (!ctx->buf)
+		goto error;
+	ctx->seq = random();
+	return ctx;
+error:
+	mlx5_flow_tcf_context_destroy(ctx);
 	return NULL;
 }
 
 /**
- * Destroy a libmnl socket.
+ * Destroy a libmnl context.
  *
  * @param nl
  *   Libmnl socket of the @p NETLINK_ROUTE kind.
  */
 void
-mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
+mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
 {
-	mnl_socket_close(nl);
+	if (!ctx)
+		return;
+	mlx5_flow_mnl_socket_destroy(ctx->nl);
+	rte_free(ctx->buf);
+	rte_free(ctx);
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* [dpdk-dev] [PATCH v2 2/2] net/mlx5: support e-switch flow count action
  2018-10-11 13:04 [dpdk-dev] [PATCH v2 0/2] support e-switch flow count action Mordechay Haimovsky
  2018-10-11 13:04 ` [dpdk-dev] [PATCH v2 1/2] net/mlx5: refactor TC-flow infrastructure Mordechay Haimovsky
@ 2018-10-11 13:04 ` Mordechay Haimovsky
  2018-10-14 11:07   ` Shahaf Shuler
                     ` (3 more replies)
  1 sibling, 4 replies; 21+ messages in thread
From: Mordechay Haimovsky @ 2018-10-11 13:04 UTC (permalink / raw)
  To: Shahaf Shuler; +Cc: dev, Mordechay Haimovsky
This commit adds support for configuring flows destined to the mlx5
eswitch with 'count' action and for querying these counts at runtime.
It is possible to offload an interface flow rules to the hardware
using DPDK flow commands.
With mlx5 it is also possible to offload a limited set of flow rules to
the mlxsw (or e-switch) using the same DPDK flow commands using the
'transfer' attribute in the flow rule creation command.
The commands destined for the switch are transposed to TC flower rules
and are sent, as Netlink messages, to the mlx5 driver (or more precisely
to the netdev which represent the mlxsw port).
Each flow rule configured by the mlx5 driver is also assigned with a set
of flow counters implicitly. These counters can be retrieved when querying
the flow rule via Netlink, they can be found in each flow action section
of the reply.
Currently the limited set of eswitch flow rules does not contain the
'count' action but since every rule contains a count we can still retrieve
these values as if we configured a 'count' action.
Supporting the 'count' action in the flow configuration command is
straight-forward. When transposing the command to a tc flower Netlink
message we just ignore it instead of rejecting it.
So the following two commands will have the same affect and behavior:
  testpmd> flow create 0 transfer ingress pattern eth src is
           11:22:33:44:55:77 dst is 11:22:33:44:55:88 / end
           actions drop / end
  testpmd> flow create 0 transfer ingress pattern eth src is
           11:22:33:44:55:77 dst is 11:22:33:44:55:88 / end
           actions count / drop / end
In the flow query side, the command now also returns the counts the
above flow via using tc Netlink query command.
Special care was taken in order to prevent Netlink messages truncation
due to short buffers by using MNL_SOCKET_BUFFER_SIZE buffers which are
pre-allocate per port instance.
Signed-off-by: Moti Haimovsky <motih@mellanox.com>
---
v2:
 * Rebase on top of 3f4722ee01e7 ("net/mlx5: refactor TC-flow infrastructure")
---
 drivers/net/mlx5/mlx5_flow.c       |  29 +++-
 drivers/net/mlx5/mlx5_flow.h       |  14 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   1 +
 drivers/net/mlx5/mlx5_flow_tcf.c   | 286 ++++++++++++++++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_flow_verbs.c |   1 +
 5 files changed, 325 insertions(+), 6 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 6b2698a..7c6ece1 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1649,6 +1649,17 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 {
 }
 
+int
+flow_null_query(struct rte_eth_dev *dev __rte_unused,
+		struct rte_flow *flow __rte_unused,
+		enum rte_flow_action_type type __rte_unused,
+		void *data __rte_unused,
+		struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
 /* Void driver to protect from null pointer reference. */
 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
 	.validate = flow_null_validate,
@@ -1657,6 +1668,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 	.apply = flow_null_apply,
 	.remove = flow_null_remove,
 	.destroy = flow_null_destroy,
+	.query = flow_null_query,
 };
 
 /**
@@ -2349,10 +2361,19 @@ struct rte_flow *
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
-		      void *data __rte_unused,
+mlx5_flow_query_count(struct rte_eth_dev *dev,
+		      struct rte_flow *flow,
+		      void *data,
 		      struct rte_flow_error *error)
 {
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type ftype = flow->drv_type;
+
+	assert(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(ftype);
+	if (ftype == MLX5_FLOW_TYPE_TCF)
+		return fops->query(dev, flow,
+				   RTE_FLOW_ACTION_TYPE_COUNT, data, error);
 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
 	if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
 		struct rte_flow_query_count *qc = data;
@@ -2402,7 +2423,7 @@ struct rte_flow *
  * @see rte_flow_ops
  */
 int
-mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
+mlx5_flow_query(struct rte_eth_dev *dev,
 		struct rte_flow *flow,
 		const struct rte_flow_action *actions,
 		void *data,
@@ -2415,7 +2436,7 @@ struct rte_flow *
 		case RTE_FLOW_ACTION_TYPE_VOID:
 			break;
 		case RTE_FLOW_ACTION_TYPE_COUNT:
-			ret = mlx5_flow_query_count(flow, data, error);
+			ret = mlx5_flow_query_count(dev, flow, data, error);
 			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 41d55e8..3e1e9a0 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -175,6 +175,8 @@ struct mlx5_flow_dv {
 struct mlx5_flow_tcf {
 	struct nlmsghdr *nlh;
 	struct tcmsg *tcm;
+	uint64_t hits;
+	uint64_t bytes;
 };
 
 /* Verbs specification header. */
@@ -232,7 +234,6 @@ struct rte_flow {
 	struct rte_flow_action_rss rss;/**< RSS context. */
 	uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
 	uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
-	void *nl_flow; /**< Netlink flow buffer if relevant. */
 	LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
 	/**< Device flows that are part of the flow. */
 	uint32_t actions; /**< Bit-fields which mark all detected actions. */
@@ -258,6 +259,11 @@ typedef void (*mlx5_flow_remove_t)(struct rte_eth_dev *dev,
 				   struct rte_flow *flow);
 typedef void (*mlx5_flow_destroy_t)(struct rte_eth_dev *dev,
 				    struct rte_flow *flow);
+typedef int (*mlx5_flow_query_t)(struct rte_eth_dev *dev,
+				 struct rte_flow *flow,
+				 enum rte_flow_action_type type,
+				 void *data,
+				 struct rte_flow_error *error);
 struct mlx5_flow_driver_ops {
 	mlx5_flow_validate_t validate;
 	mlx5_flow_prepare_t prepare;
@@ -265,10 +271,16 @@ struct mlx5_flow_driver_ops {
 	mlx5_flow_apply_t apply;
 	mlx5_flow_remove_t remove;
 	mlx5_flow_destroy_t destroy;
+	mlx5_flow_query_t query;
 };
 
 /* mlx5_flow.c */
 
+int flow_null_query(struct rte_eth_dev *dev,
+		    struct rte_flow *flow,
+		    enum rte_flow_action_type type,
+		    void *data,
+		    struct rte_flow_error *error);
 uint64_t mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow, int tunnel,
 				     uint32_t layer_types,
 				     uint64_t hash_fields);
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index c9aa50f..9c8d074 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -1365,6 +1365,7 @@
 	.apply = flow_dv_apply,
 	.remove = flow_dv_remove,
 	.destroy = flow_dv_destroy,
+	.query = flow_null_query,
 };
 
 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 8535a15..d12a566 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -6,6 +6,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <libmnl/libmnl.h>
+#include <linux/gen_stats.h>
 #include <linux/if_ether.h>
 #include <linux/netlink.h>
 #include <linux/pkt_cls.h>
@@ -163,7 +164,8 @@ struct mlx5_flow_tcf_context {
 	struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
 	uint32_t seq; /* Message sequence number. */
 	uint32_t buf_size; /* Message buffer size. */
-	uint8_t *buf; /* Message buffer. */
+	uint8_t *buf;
+	/* Message buffer (used for receiving large netlink messages). */
 };
 
 /** Empty masks for known item types. */
@@ -696,6 +698,9 @@ struct flow_tcf_ptoi {
 					 "can't have multiple fate actions");
 			action_flags |= MLX5_FLOW_ACTION_DROP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			action_flags |= MLX5_FLOW_ACTION_COUNT;
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			action_flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
 			break;
@@ -875,6 +880,9 @@ struct flow_tcf_ptoi {
 				SZ_NLATTR_TYPE_OF(struct tc_gact);
 			flags |= MLX5_FLOW_ACTION_DROP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			flags |= MLX5_FLOW_ACTION_COUNT;
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
 			goto action_of_vlan;
@@ -1360,6 +1368,12 @@ struct flow_tcf_ptoi {
 			mnl_attr_nest_end(nlh, na_act);
 			mnl_attr_nest_end(nlh, na_act_index);
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			/*
+			 * Driver adds the count action implicitly for
+			 * each rule it creates.
+			 */
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			conf.of_push_vlan = NULL;
 			vlan_act = TCA_VLAN_ACT_POP;
@@ -1564,6 +1578,275 @@ struct flow_tcf_ptoi {
 	rte_free(dev_flow);
 }
 
+/**
+ * Parse rtnetlink message attributes filling the attribute table with the info
+ * being retrieved.
+ *
+ * @param tb
+ *   Attribute table to be filled.
+ * @param[out] max
+ *   Maxinum entry in the attribute table.
+ * @param rte
+ *   The attributes section in the message to be parsed.
+ * @param len
+ *   The length of the attributes section in the message.
+ * @return
+ *   0 on successful extraction of action counts, -1 otherwise.
+ */
+static void
+tc_parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
+{
+	unsigned short type;
+	memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+	while (RTA_OK(rta, len)) {
+		type = rta->rta_type;
+		if (type <= max && !tb[type])
+			tb[type] = rta;
+		rta = RTA_NEXT(rta, len);
+	}
+}
+
+ /**
+  * Extract action counters from flower action.
+  *
+  * @param rta
+  *   flower action stats properties in the Netlink message received.
+  * @param[out] qc
+  *   Count statistics retrieved from the message query.
+  * @return
+  *   0 on successful extraction of action counts, -1 otherwise.
+  */
+static int
+tc_flow_extract_stats_attr(struct rtattr *rta, struct rte_flow_query_count *qc)
+{
+	struct rtattr *tbs[TCA_STATS_MAX + 1];
+
+	tc_parse_rtattr(tbs, TCA_STATS_MAX, RTA_DATA(rta), RTA_PAYLOAD(rta));
+	if (tbs[TCA_STATS_BASIC]) {
+		struct gnet_stats_basic bs = {0};
+
+		memcpy(&bs, RTA_DATA(tbs[TCA_STATS_BASIC]),
+		       RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
+		       sizeof(bs)));
+		qc->bytes = bs.bytes;
+		qc->hits = bs.packets;
+		qc->bytes_set = 1;
+		qc->hits_set = 1;
+		return 0;
+	}
+	return -1;
+}
+
+ /**
+  * Parse flower single action retrieving the flow counters from it if present.
+  *
+  * @param arg
+  *   flower action properties in the Netlink message received.
+  * @param[out] qc
+  *   Count statistics retrieved from the message query.
+  * @return
+  *   0 on successful retrieval of action counts, -1 otherwise.
+  */
+static int
+tc_flow_parse_one_action(struct rtattr *arg, struct rte_flow_query_count *qc)
+{
+	struct rtattr *tb[TCA_ACT_MAX + 1];
+
+	if (arg == NULL)
+		return -1;
+	tc_parse_rtattr(tb, TCA_ACT_MAX, RTA_DATA(arg), RTA_PAYLOAD(arg));
+	if (tb[TCA_ACT_KIND] == NULL)
+		return -1;
+	if (tb[TCA_ACT_STATS])
+		return tc_flow_extract_stats_attr(tb[TCA_ACT_STATS], qc);
+	return -1;
+}
+
+ /**
+  * Parse flower action section in the message, retrieving the flow counters
+  * from the first action that contains them.
+  * flow counters are stored in the actions defined by the flow and not in the
+  * flow itself, therefore we need to traverse the flower action in search for
+  * them.
+  *
+  * @param opt
+  *   flower section in the Netlink message received.
+  * @param[out] qc
+  *   Count statistics retrieved from the message query.
+  */
+static void
+tc_flow_parse_action(const struct rtattr *arg, struct rte_flow_query_count *qc)
+{
+	struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
+	int i;
+
+	if (arg == NULL)
+		return;
+	tc_parse_rtattr(tb, TCA_ACT_MAX_PRIO, RTA_DATA(arg), RTA_PAYLOAD(arg));
+	for (i = 0; i <= TCA_ACT_MAX_PRIO; i++)
+		if (tb[i])
+			if (tc_flow_parse_one_action(tb[i], qc) == 0)
+				break;
+}
+
+ /**
+  * Parse Netlink reply on flower type of filters, retrieving the flow counters
+  * from it.
+  *
+  * @param opt
+  *   flower section in the Netlink message received.
+  * @param[out] qc
+  *   Count statistics retrieved from the message query.
+  */
+static void
+tc_flower_parse_opt(struct rtattr *opt,
+		    struct rte_flow_query_count *qc)
+{
+	struct rtattr *tb[TCA_FLOWER_MAX + 1];
+
+	if (!opt)
+		return;
+	tc_parse_rtattr(tb, TCA_FLOWER_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt));
+	if (tb[TCA_FLOWER_ACT])
+		tc_flow_parse_action(tb[TCA_FLOWER_ACT], qc);
+}
+
+ /**
+  * Parse Netlink reply on filter query, retrieving the flow counters.
+  *
+  * @param nlh
+  *   Message received from Netlink.
+  * @param[out] qc
+  *   Count statistics retrieved from the message query.
+  *
+  * @return
+  *   MNL_CB_ERROR on error, MNL_CB_OK value otherwise.
+  */
+static int
+mlx5_nl_flow_parse_filter(const struct nlmsghdr *nlh,
+			  struct rte_flow_query_count *qc)
+{
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len = nlh->nlmsg_len;
+	struct rtattr *tb[TCA_MAX + 1] = { };
+
+	if (nlh->nlmsg_type != RTM_NEWTFILTER &&
+	    nlh->nlmsg_type != RTM_GETTFILTER &&
+	    nlh->nlmsg_type != RTM_DELTFILTER)
+		return MNL_CB_OK;
+	len -= NLMSG_LENGTH(sizeof(*t));
+	if (len < 0)
+		return MNL_CB_ERROR;
+	tc_parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len);
+	if (tb[TCA_KIND])
+		if (strcmp(RTA_DATA(tb[TCA_KIND]), "flower") == 0)
+			tc_flower_parse_opt(tb[TCA_OPTIONS], qc);
+	return MNL_CB_OK;
+}
+
+/**
+ * A callback to parse Netlink reply on filter query attempting to retrieve the
+ * flow counters if present.
+ *
+ * @param nlh
+ *   Message received from Netlink.
+ * @param[out] data
+ *   pointer to the count statistics to be filled by the routine.
+ *
+ * @return
+ *   MNL_CB_ERROR on error, MNL_CB_OK value otherwise.
+ */
+static int
+mlx5_nl_flow_parse_message(const struct nlmsghdr *nlh, void *data)
+{
+	struct rte_flow_query_count *qc = (struct rte_flow_query_count *)data;
+
+	switch (nlh->nlmsg_type) {
+	case NLMSG_NOOP:
+		return MNL_CB_OK;
+	case NLMSG_ERROR:
+	case NLMSG_OVERRUN:
+		return MNL_CB_ERROR;
+	default:
+		break;
+	}
+	return mlx5_nl_flow_parse_filter(nlh, qc);
+}
+
+ /**
+  * Query a tcf rule for its statistics via netlink.
+  *
+  * @param[in] dev
+  *   Pointer to Ethernet device.
+  * @param[in] flow
+  *   Pointer to the sub flow.
+  * @param[out] data
+  *   data retrieved by the query.
+  * @param[out] error
+  *   Perform verbose error reporting if not NULL.
+  *
+  * @return
+  *   0 on success, a negative errno value otherwise and rte_errno is set.
+  */
+static int
+mlx5_flow_tcf_query(struct rte_eth_dev *dev,
+			  struct rte_flow *flow,
+			  enum rte_flow_action_type type,
+			  void *data,
+			  struct rte_flow_error *error)
+{
+	struct rte_flow_query_count *qc = data;
+	struct priv *priv = dev->data->dev_private;
+	struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
+	struct mnl_socket *nl = ctx->nl;
+	struct mlx5_flow *dev_flow;
+	struct nlmsghdr *nlh;
+	uint32_t seq = priv->tcf_context->seq++;
+	ssize_t ret;
+	assert(qc);
+
+	dev_flow = LIST_FIRST(&flow->dev_flows);
+	/* E-Switch flow can't be expanded. */
+	assert(!LIST_NEXT(dev_flow, next));
+	/* Currently only query count is supported. */
+	if (type != RTE_FLOW_ACTION_TYPE_COUNT)
+		goto error_nosup;
+	nlh = dev_flow->tcf.nlh;
+	nlh->nlmsg_type = RTM_GETTFILTER;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
+	nlh->nlmsg_seq = seq;
+	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
+		goto error_exit;
+	ret = mnl_socket_recvfrom(nl, priv->tcf_context->buf,
+				  priv->tcf_context->buf_size);
+	if (ret == -1)
+		goto error_exit;
+	while (ret > 0) {
+		ret = mnl_cb_run(ctx->buf, ret, seq,
+				 mnl_socket_get_portid(nl),
+				 mlx5_nl_flow_parse_message, qc);
+		if (ret <= MNL_CB_STOP)
+			break;
+		ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
+	}
+	/* Return the delta from last reset. */
+	qc->hits -= dev_flow->tcf.hits;
+	qc->bytes -= dev_flow->tcf.bytes;
+	if (qc->reset) {
+		dev_flow->tcf.hits = qc->hits;
+		dev_flow->tcf.bytes = qc->bytes;
+	}
+	return 0;
+error_nosup:
+	return rte_flow_error_set
+			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+			 NULL, "tcf: unsupported query");
+error_exit:
+	return rte_flow_error_set
+			(error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			 NULL, "netlink: failed to read flow rule statistics");
+}
+
 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
 	.validate = flow_tcf_validate,
 	.prepare = flow_tcf_prepare,
@@ -1571,6 +1854,7 @@ struct flow_tcf_ptoi {
 	.apply = flow_tcf_apply,
 	.remove = flow_tcf_remove,
 	.destroy = flow_tcf_destroy,
+	.query = mlx5_flow_tcf_query,
 };
 
 /**
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index ad8f7ac..e377b3b 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -1655,4 +1655,5 @@
 	.apply = flow_verbs_apply,
 	.remove = flow_verbs_remove,
 	.destroy = flow_verbs_destroy,
+	.query = flow_null_query,
 };
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/2] net/mlx5: refactor TC-flow infrastructure
  2018-10-11 13:04 ` [dpdk-dev] [PATCH v2 1/2] net/mlx5: refactor TC-flow infrastructure Mordechay Haimovsky
@ 2018-10-14 11:07   ` Shahaf Shuler
  0 siblings, 0 replies; 21+ messages in thread
From: Shahaf Shuler @ 2018-10-14 11:07 UTC (permalink / raw)
  To: Mordechay Haimovsky; +Cc: dev, Slava Ovsiienko
Hi Moty,
Thursday, October 11, 2018 4:05 PM, Mordechay Haimovsky:
> Subject: [PATCH v2 1/2] net/mlx5: refactor TC-flow infrastructure
> 
> This commit refactors tc_flow as a preparation to coming commits that sends
> different type of messages and expect differ type of replies while still using
> the same underlying routines.
> 
> Signed-off-by: Moti Haimovsky <motih@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.c          |  18 +++----
>  drivers/net/mlx5/mlx5.h          |   4 +-
>  drivers/net/mlx5/mlx5_flow.h     |   8 +--
>  drivers/net/mlx5/mlx5_flow_tcf.c | 113
> ++++++++++++++++++++++++++++++---------
>  4 files changed, 102 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> 7425e2f..20adf88 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -286,8 +286,8 @@
>  		close(priv->nl_socket_route);
>  	if (priv->nl_socket_rdma >= 0)
>  		close(priv->nl_socket_rdma);
> -	if (priv->mnl_socket)
> -		mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
> +	if (priv->tcf_context)
> +		mlx5_flow_tcf_context_destroy(priv->tcf_context);
>  	ret = mlx5_hrxq_ibv_verify(dev);
>  	if (ret)
>  		DRV_LOG(WARNING, "port %u some hash Rx queue still
> remain", @@ -1137,8 +1137,8 @@
>  	claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
>  	if (vf && config.vf_nl_en)
>  		mlx5_nl_mac_addr_sync(eth_dev);
> -	priv->mnl_socket = mlx5_flow_tcf_socket_create();
> -	if (!priv->mnl_socket) {
> +	priv->tcf_context = mlx5_flow_tcf_context_create();
> +	if (!priv->tcf_context) {
>  		err = -rte_errno;
>  		DRV_LOG(WARNING,
>  			"flow rules relying on switch offloads will not be"
> @@ -1153,7 +1153,7 @@
>  			error.message =
>  				"cannot retrieve network interface index";
>  		} else {
> -			err = mlx5_flow_tcf_init(priv->mnl_socket, ifindex,
> +			err = mlx5_flow_tcf_init(priv->tcf_context, ifindex,
>  						&error);
>  		}
>  		if (err) {
> @@ -1161,8 +1161,8 @@
>  				"flow rules relying on switch offloads will"
>  				" not be supported: %s: %s",
>  				error.message, strerror(rte_errno));
> -			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
> -			priv->mnl_socket = NULL;
> +			mlx5_flow_tcf_context_destroy(priv->tcf_context);
> +			priv->tcf_context = NULL;
>  		}
>  	}
>  	TAILQ_INIT(&priv->flows);
> @@ -1217,8 +1217,8 @@
>  			close(priv->nl_socket_route);
>  		if (priv->nl_socket_rdma >= 0)
>  			close(priv->nl_socket_rdma);
> -		if (priv->mnl_socket)
> -			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
> +		if (priv->tcf_context)
> +			mlx5_flow_tcf_context_destroy(priv->tcf_context);
>  		if (own_domain_id)
>  			claim_zero(rte_eth_switch_domain_free(priv-
> >domain_id));
>  		rte_free(priv);
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 2dec88a..d14239c 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -169,7 +169,7 @@ struct mlx5_drop {
>  	struct mlx5_rxq_ibv *rxq; /* Verbs Rx queue. */  };
> 
> -struct mnl_socket;
> +struct mlx5_flow_tcf_context;
> 
>  struct priv {
>  	LIST_ENTRY(priv) mem_event_cb; /* Called by memory event
> callback. */ @@ -236,7 +236,7 @@ struct priv {
>  	rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
>  	/* UAR same-page access control required in 32bit implementations.
> */  #endif
> -	struct mnl_socket *mnl_socket; /* Libmnl socket. */
> +	struct mlx5_flow_tcf_context *tcf_context; /* TC flower context. */
>  };
> 
>  #define PORT_ID(priv) ((priv)->dev_data->port_id) diff --git
> a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index
> fee05f0..41d55e8 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -338,9 +338,9 @@ int mlx5_flow_validate_item_vxlan_gpe(const struct
> rte_flow_item *item,
> 
>  /* mlx5_flow_tcf.c */
> 
> -int mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
> -		       struct rte_flow_error *error);
> -struct mnl_socket *mlx5_flow_tcf_socket_create(void);
> -void mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl);
> +int mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
> +		       unsigned int ifindex, struct rte_flow_error *error); struct
> +mlx5_flow_tcf_context *mlx5_flow_tcf_context_create(void);
> +void mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx);
> 
>  #endif /* RTE_PMD_MLX5_FLOW_H_ */
> diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c
> b/drivers/net/mlx5/mlx5_flow_tcf.c
> index 91f6ef6..8535a15 100644
> --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> @@ -153,6 +153,19 @@ struct tc_vlan {
>  #define IPV6_ADDR_LEN 16
>  #endif
> 
> +/**
> + * Structure for holding netlink context.
> + * Note the size of the message buffer which is
> MNL_SOCKET_BUFFER_SIZE.
> + * Using this (8KB) buffer size ensures that netlink messages will
> +never be
> + * truncated.
> + */
> +struct mlx5_flow_tcf_context {
> +	struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
> +	uint32_t seq; /* Message sequence number. */
I don't think the seq is needed here. See comments below. 
> +	uint32_t buf_size; /* Message buffer size. */
If the buffer is always MNL_SOCKET_BUFFER_SIZE why not statically allocate it, like 
uint8_t buf[MNL_SOCKET_BUFFER_SIZE];
however I don't think you need it. See comments on of the next patch in the series.
> +	uint8_t *buf; /* Message buffer. */
> +};
so in fact maybe this struct is not needed at all. 
> +
>  /** Empty masks for known item types. */  static const union {
>  	struct rte_flow_item_port_id port_id;
> @@ -1429,8 +1442,8 @@ struct flow_tcf_ptoi {
>  /**
>   * Send Netlink message with acknowledgment.
>   *
> - * @param nl
> - *   Libmnl socket to use.
> + * @param ctx
> + *   Flow context to use.
>   * @param nlh
>   *   Message to send. This function always raises the NLM_F_ACK flag before
>   *   sending.
> @@ -1439,12 +1452,13 @@ struct flow_tcf_ptoi {
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  static int
> -flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
> +flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr
> +*nlh)
>  {
>  	alignas(struct nlmsghdr)
>  	uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
>  		    nlh->nlmsg_len - sizeof(*nlh)];
> -	uint32_t seq = random();
> +	uint32_t seq = ctx->seq++;
Why changing it to serially increment? 
Netlink messages requires requests to have a unique seq id. It is more likely to hit conflict on the serial increment in case both port received a similar or close number.
Making each request with random value has less chances for such conflict.  
> +	struct mnl_socket *nl = ctx->nl;
>  	int ret;
> 
>  	nlh->nlmsg_flags |= NLM_F_ACK;
> @@ -1479,7 +1493,7 @@ struct flow_tcf_ptoi {
>  	       struct rte_flow_error *error)
>  {
>  	struct priv *priv = dev->data->dev_private;
> -	struct mnl_socket *nl = priv->mnl_socket;
> +	struct mlx5_flow_tcf_context *nl = priv->tcf_context;
>  	struct mlx5_flow *dev_flow;
>  	struct nlmsghdr *nlh;
> 
> @@ -1508,7 +1522,7 @@ struct flow_tcf_ptoi {  flow_tcf_remove(struct
> rte_eth_dev *dev, struct rte_flow *flow)  {
>  	struct priv *priv = dev->data->dev_private;
> -	struct mnl_socket *nl = priv->mnl_socket;
> +	struct mlx5_flow_tcf_context *nl = priv->tcf_context;
>  	struct mlx5_flow *dev_flow;
>  	struct nlmsghdr *nlh;
> 
> @@ -1560,10 +1574,47 @@ struct flow_tcf_ptoi {  };
> 
>  /**
> - * Initialize ingress qdisc of a given network interface.
> + * Create and configure a libmnl socket for Netlink flow rules.
> + *
> + * @return
> + *   A valid libmnl socket object pointer on success, NULL otherwise and
> + *   rte_errno is set.
> + */
> +static struct mnl_socket *
> +mlx5_flow_mnl_socket_create(void)
> +{
> +	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
> +
> +	if (nl) {
> +		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
> +				      sizeof(int));
> +		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
> +			return nl;
> +	}
> +	rte_errno = errno;
> +	if (nl)
> +		mnl_socket_close(nl);
> +	return NULL;
> +}
> +
> +/**
> + * Destroy a libmnl socket.
>   *
>   * @param nl
>   *   Libmnl socket of the @p NETLINK_ROUTE kind.
> + */
> +static void
> +mlx5_flow_mnl_socket_destroy(struct mnl_socket *nl) {
> +	if (nl)
> +		mnl_socket_close(nl);
> +}
> +
> +/**
> + * Initialize ingress qdisc of a given network interface.
> + *
> + * @param nl
> + *   Pointer to tc-flower context to use.
>   * @param ifindex
>   *   Index of network interface to initialize.
>   * @param[out] error
> @@ -1573,8 +1624,8 @@ struct flow_tcf_ptoi {
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  int
> -mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
> -		   struct rte_flow_error *error)
> +mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *nl,
> +		   unsigned int ifindex, struct rte_flow_error *error)
>  {
>  	struct nlmsghdr *nlh;
>  	struct tcmsg *tcm;
> @@ -1616,37 +1667,47 @@ struct flow_tcf_ptoi {  }
> 
>  /**
> - * Create and configure a libmnl socket for Netlink flow rules.
> + * Create libmnl context for Netlink flow rules.
>   *
>   * @return
>   *   A valid libmnl socket object pointer on success, NULL otherwise and
>   *   rte_errno is set.
>   */
> -struct mnl_socket *
> -mlx5_flow_tcf_socket_create(void)
> +struct mlx5_flow_tcf_context *
> +mlx5_flow_tcf_context_create(void)
>  {
> -	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
> -
> -	if (nl) {
> -		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
> -				      sizeof(int));
> -		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
> -			return nl;
> -	}
> -	rte_errno = errno;
> -	if (nl)
> -		mnl_socket_close(nl);
> +	struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
> +							sizeof(*ctx),
> +							sizeof(uint32_t));
> +	if (!ctx)
> +		goto error;
> +	ctx->nl = mlx5_flow_mnl_socket_create();
> +	if (!ctx->nl)
> +		goto error;
> +	ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
> +	ctx->buf = rte_zmalloc(__func__,
> +			       ctx->buf_size, sizeof(uint32_t));
> +	if (!ctx->buf)
> +		goto error;
> +	ctx->seq = random();
> +	return ctx;
> +error:
> +	mlx5_flow_tcf_context_destroy(ctx);
>  	return NULL;
>  }
> 
>  /**
> - * Destroy a libmnl socket.
> + * Destroy a libmnl context.
>   *
>   * @param nl
>   *   Libmnl socket of the @p NETLINK_ROUTE kind.
>   */
>  void
> -mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
> +mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
>  {
> -	mnl_socket_close(nl);
> +	if (!ctx)
> +		return;
> +	mlx5_flow_mnl_socket_destroy(ctx->nl);
> +	rte_free(ctx->buf);
> +	rte_free(ctx);
>  }
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/2] net/mlx5: support e-switch flow count action
  2018-10-11 13:04 ` [dpdk-dev] [PATCH v2 2/2] net/mlx5: support e-switch flow count action Mordechay Haimovsky
@ 2018-10-14 11:07   ` Shahaf Shuler
  2018-10-16 23:50   ` [dpdk-dev] [PATCH v3 0/2] " Mordechay Haimovsky
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 21+ messages in thread
From: Shahaf Shuler @ 2018-10-14 11:07 UTC (permalink / raw)
  To: Mordechay Haimovsky; +Cc: dev
Hi Moty, 
In addition to below, the prefix for each tc flow function should be flow_tcf_*. Make sure to update your functions. 
Thursday, October 11, 2018 4:05 PM, Mordechay Haimovsky:
> Subject: [PATCH v2 2/2] net/mlx5: support e-switch flow count action
> 
> This commit adds support for configuring flows destined to the mlx5 eswitch
> with 'count' action and for querying these counts at runtime.
> 
Log from here...
> It is possible to offload an interface flow rules to the hardware using DPDK
> flow commands.
> With mlx5 it is also possible to offload a limited set of flow rules to the mlxsw
> (or e-switch) using the same DPDK flow commands using the 'transfer'
> attribute in the flow rule creation command.
> The commands destined for the switch are transposed to TC flower rules and
> are sent, as Netlink messages, to the mlx5 driver (or more precisely to the
> netdev which represent the mlxsw port).
Till here is not needed. 
> Each flow rule configured by the mlx5 driver is also assigned with a set of flow
With TC, each flow rule ...
Also a set or a single counter?
> counters implicitly. These counters can be retrieved when querying the flow
> rule via Netlink, they can be found in each flow action section of the reply.
No need for log from here...
> Currently the limited set of eswitch flow rules does not contain the 'count'
> action but since every rule contains a count we can still retrieve these values
> as if we configured a 'count' action.
Till here..
> 
> Supporting the 'count' action in the flow configuration command is straight-
> forward. 
Hence, supporting the 'count' action...
When transposing the command to a tc flower Netlink message we
> just ignore it instead of rejecting it.
> So the following two commands will have the same affect and behavior:
>   testpmd> flow create 0 transfer ingress pattern eth src is
>            11:22:33:44:55:77 dst is 11:22:33:44:55:88 / end
>            actions drop / end
>   testpmd> flow create 0 transfer ingress pattern eth src is
>            11:22:33:44:55:77 dst is 11:22:33:44:55:88 / end
>            actions count / drop / end
Right, but it is better the user will explicitly provide count action anyway. 
> In the flow query side, the command now also returns the counts the above
Rephrase "the counts the above", maybe the counters or the above counters. 
> flow via using tc Netlink query command.
> Special care was taken in order to prevent Netlink messages truncation due
> to short buffers by using MNL_SOCKET_BUFFER_SIZE buffers which are pre-
> allocate per port instance.
I don't understand this part. 
Isn't it a fixed size of response when querying the packet and bytes counters of a single flow? 
What is the size? 
> 
> Signed-off-by: Moti Haimovsky <motih@mellanox.com>
> ---
> v2:
>  * Rebase on top of 3f4722ee01e7 ("net/mlx5: refactor TC-flow
> infrastructure")
> ---
>  drivers/net/mlx5/mlx5_flow.c       |  29 +++-
>  drivers/net/mlx5/mlx5_flow.h       |  14 +-
>  drivers/net/mlx5/mlx5_flow_dv.c    |   1 +
>  drivers/net/mlx5/mlx5_flow_tcf.c   | 286
> ++++++++++++++++++++++++++++++++++++-
>  drivers/net/mlx5/mlx5_flow_verbs.c |   1 +
>  5 files changed, 325 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index 6b2698a..7c6ece1 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -1649,6 +1649,17 @@ uint32_t mlx5_flow_adjust_priority(struct
> rte_eth_dev *dev, int32_t priority,  {  }
> 
> +int
> +flow_null_query(struct rte_eth_dev *dev __rte_unused,
> +		struct rte_flow *flow __rte_unused,
> +		enum rte_flow_action_type type __rte_unused,
> +		void *data __rte_unused,
> +		struct rte_flow_error *error __rte_unused) {
> +	rte_errno = ENOTSUP;
> +	return -rte_errno;
> +}
> +
>  /* Void driver to protect from null pointer reference. */  const struct
> mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
>  	.validate = flow_null_validate,
> @@ -1657,6 +1668,7 @@ uint32_t mlx5_flow_adjust_priority(struct
> rte_eth_dev *dev, int32_t priority,
>  	.apply = flow_null_apply,
>  	.remove = flow_null_remove,
>  	.destroy = flow_null_destroy,
> +	.query = flow_null_query,
>  };
> 
You added the query function pointer to the driver ops which is good. 
>  /**
> @@ -2349,10 +2361,19 @@ struct rte_flow *
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  static int
> -mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
> -		      void *data __rte_unused,
> +mlx5_flow_query_count(struct rte_eth_dev *dev,
Why not calling it flow_drv_query, and to have the function body just like the other function pointers:
Get the driver type from dev and then call its query op. 
You will probably need to move the current implementation to the query function of the verbs. 
> +		      struct rte_flow *flow,
> +		      void *data,
>  		      struct rte_flow_error *error)
>  {
> +	const struct mlx5_flow_driver_ops *fops;
> +	enum mlx5_flow_drv_type ftype = flow->drv_type;
> +
> +	assert(ftype > MLX5_FLOW_TYPE_MIN && ftype <
> MLX5_FLOW_TYPE_MAX);
We don't have this assert on any other function pointer. Not sure why to start now. 
> +	fops = flow_get_drv_ops(ftype);
> +	if (ftype == MLX5_FLOW_TYPE_TCF)
> +		return fops->query(dev, flow,
> +				   RTE_FLOW_ACTION_TYPE_COUNT, data,
> error);
>  #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
>  	if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
>  		struct rte_flow_query_count *qc = data; @@ -2402,7 +2423,7
> @@ struct rte_flow *
>   * @see rte_flow_ops
>   */
>  int
> -mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
> +mlx5_flow_query(struct rte_eth_dev *dev,
>  		struct rte_flow *flow,
>  		const struct rte_flow_action *actions,
>  		void *data,
> @@ -2415,7 +2436,7 @@ struct rte_flow *
>  		case RTE_FLOW_ACTION_TYPE_VOID:
>  			break;
>  		case RTE_FLOW_ACTION_TYPE_COUNT:
> -			ret = mlx5_flow_query_count(flow, data, error);
> +			ret = mlx5_flow_query_count(dev, flow, data, error);
>  			break;
>  		default:
>  			return rte_flow_error_set(error, ENOTSUP, diff --git
> a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index
> 41d55e8..3e1e9a0 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -175,6 +175,8 @@ struct mlx5_flow_dv {  struct mlx5_flow_tcf {
>  	struct nlmsghdr *nlh;
>  	struct tcmsg *tcm;
> +	uint64_t hits;
> +	uint64_t bytes;
You have the "last before reset" flow query counters as part of the struct rte_flow. Do you really need to add those here? 
>  };
> 
>  /* Verbs specification header. */
> @@ -232,7 +234,6 @@ struct rte_flow {
>  	struct rte_flow_action_rss rss;/**< RSS context. */
>  	uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
>  	uint16_t (*queue)[]; /**< Destination queues to redirect traffic to.
> */
> -	void *nl_flow; /**< Netlink flow buffer if relevant. */
>  	LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
>  	/**< Device flows that are part of the flow. */
>  	uint32_t actions; /**< Bit-fields which mark all detected actions. */
> @@ -258,6 +259,11 @@ typedef void (*mlx5_flow_remove_t)(struct
> rte_eth_dev *dev,
>  				   struct rte_flow *flow);
>  typedef void (*mlx5_flow_destroy_t)(struct rte_eth_dev *dev,
>  				    struct rte_flow *flow);
> +typedef int (*mlx5_flow_query_t)(struct rte_eth_dev *dev,
> +				 struct rte_flow *flow,
> +				 enum rte_flow_action_type type,
Do you believe you will have other type than count? mlx5_flow_query should protect against it. 
> +				 void *data,
> +				 struct rte_flow_error *error);
>  struct mlx5_flow_driver_ops {
>  	mlx5_flow_validate_t validate;
>  	mlx5_flow_prepare_t prepare;
> @@ -265,10 +271,16 @@ struct mlx5_flow_driver_ops {
>  	mlx5_flow_apply_t apply;
>  	mlx5_flow_remove_t remove;
>  	mlx5_flow_destroy_t destroy;
> +	mlx5_flow_query_t query;
>  };
> 
>  /* mlx5_flow.c */
> 
> +int flow_null_query(struct rte_eth_dev *dev,
> +		    struct rte_flow *flow,
> +		    enum rte_flow_action_type type,
Ditto. 
> +		    void *data,
> +		    struct rte_flow_error *error);
>  uint64_t mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow, int
> tunnel,
>  				     uint32_t layer_types,
>  				     uint64_t hash_fields);
> diff --git a/drivers/net/mlx5/mlx5_flow_dv.c
> b/drivers/net/mlx5/mlx5_flow_dv.c index c9aa50f..9c8d074 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -1365,6 +1365,7 @@
>  	.apply = flow_dv_apply,
>  	.remove = flow_dv_remove,
>  	.destroy = flow_dv_destroy,
> +	.query = flow_null_query,
>  };
> 
>  #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
> diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c
> b/drivers/net/mlx5/mlx5_flow_tcf.c
> index 8535a15..d12a566 100644
> --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> @@ -6,6 +6,7 @@
>  #include <assert.h>
>  #include <errno.h>
>  #include <libmnl/libmnl.h>
> +#include <linux/gen_stats.h>
>  #include <linux/if_ether.h>
>  #include <linux/netlink.h>
>  #include <linux/pkt_cls.h>
> @@ -163,7 +164,8 @@ struct mlx5_flow_tcf_context {
>  	struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
>  	uint32_t seq; /* Message sequence number. */
>  	uint32_t buf_size; /* Message buffer size. */
> -	uint8_t *buf; /* Message buffer. */
> +	uint8_t *buf;
> +	/* Message buffer (used for receiving large netlink messages). */
>  };
> 
>  /** Empty masks for known item types. */ @@ -696,6 +698,9 @@ struct
> flow_tcf_ptoi {
>  					 "can't have multiple fate actions");
>  			action_flags |= MLX5_FLOW_ACTION_DROP;
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_COUNT:
> +			action_flags |= MLX5_FLOW_ACTION_COUNT;
> +			break;
Is there any special validation you do for count? 
If not, why having this logic? 
>  		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
>  			action_flags |=
> MLX5_FLOW_ACTION_OF_POP_VLAN;
>  			break;
> @@ -875,6 +880,9 @@ struct flow_tcf_ptoi {
>  				SZ_NLATTR_TYPE_OF(struct tc_gact);
>  			flags |= MLX5_FLOW_ACTION_DROP;
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_COUNT:
> +			flags |= MLX5_FLOW_ACTION_COUNT;
> +			break;
Ditto, is there a real use for this flag considering count action doesn't add any bytes to the size of the netlink command? 
>  		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
>  			flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
>  			goto action_of_vlan;
> @@ -1360,6 +1368,12 @@ struct flow_tcf_ptoi {
>  			mnl_attr_nest_end(nlh, na_act);
>  			mnl_attr_nest_end(nlh, na_act_index);
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_COUNT:
> +			/*
> +			 * Driver adds the count action implicitly for
> +			 * each rule it creates.
> +			 */
> +			break;
>  		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
>  			conf.of_push_vlan = NULL;
>  			vlan_act = TCA_VLAN_ACT_POP;
> @@ -1564,6 +1578,275 @@ struct flow_tcf_ptoi {
>  	rte_free(dev_flow);
>  }
> 
> +/**
> + * Parse rtnetlink message attributes filling the attribute table with
> +the info
> + * being retrieved.
> + *
> + * @param tb
> + *   Attribute table to be filled.
> + * @param[out] max
> + *   Maxinum entry in the attribute table.
> + * @param rte
> + *   The attributes section in the message to be parsed.
> + * @param len
> + *   The length of the attributes section in the message.
> + * @return
> + *   0 on successful extraction of action counts, -1 otherwise.
Really? Looks like it returns void. 
> + */
> +static void
> +tc_parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int
> +len) {
> +	unsigned short type;
> +	memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
> +	while (RTA_OK(rta, len)) {
> +		type = rta->rta_type;
> +		if (type <= max && !tb[type])
> +			tb[type] = rta;
> +		rta = RTA_NEXT(rta, len);
> +	}
> +}
> +
> + /**
> +  * Extract action counters from flower action.
Extract flow counters from flower query. 
> +  *
> +  * @param rta
> +  *   flower action stats properties in the Netlink message received.
> +  * @param[out] qc
> +  *   Count statistics retrieved from the message query.
Isn't it the count statistics of rte_flow? 
> +  * @return
> +  *   0 on successful extraction of action counts, -1 otherwise.
> +  */
> +static int
> +tc_flow_extract_stats_attr(struct rtattr *rta, struct
> +rte_flow_query_count *qc) {
> +	struct rtattr *tbs[TCA_STATS_MAX + 1];
> +
> +	tc_parse_rtattr(tbs, TCA_STATS_MAX, RTA_DATA(rta),
> RTA_PAYLOAD(rta));
> +	if (tbs[TCA_STATS_BASIC]) {
> +		struct gnet_stats_basic bs = {0};
> +
> +		memcpy(&bs, RTA_DATA(tbs[TCA_STATS_BASIC]),
> +		       RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
> +		       sizeof(bs)));
> +		qc->bytes = bs.bytes;
> +		qc->hits = bs.packets;
> +		qc->bytes_set = 1;
> +		qc->hits_set = 1;
> +		return 0;
> +	}
> +	return -1;
> +}
> +
> + /**
> +  * Parse flower single action retrieving the flow counters from it if present.
How about making it more generic? Do not restrict to only flow counters. 
Any subsequent flow query operation can re-use your function with small extension. 
> +  *
> +  * @param arg
> +  *   flower action properties in the Netlink message received.
> +  * @param[out] qc
> +  *   Count statistics retrieved from the message query.
If you agree with above then it should be void * data
> +  * @return
> +  *   0 on successful retrieval of action counts, -1 otherwise.
> +  */
> +static int
> +tc_flow_parse_one_action(struct rtattr *arg, struct
> +rte_flow_query_count *qc) {
> +	struct rtattr *tb[TCA_ACT_MAX + 1];
> +
> +	if (arg == NULL)
> +		return -1;
> +	tc_parse_rtattr(tb, TCA_ACT_MAX, RTA_DATA(arg),
> RTA_PAYLOAD(arg));
> +	if (tb[TCA_ACT_KIND] == NULL)
> +		return -1;
> +	if (tb[TCA_ACT_STATS])
> +		return tc_flow_extract_stats_attr(tb[TCA_ACT_STATS], qc);
> +	return -1;
> +}
> +
> + /**
> +  * Parse flower action section in the message, retrieving the flow
> +counters
Again - more generic. Not only counters. 
> +  * from the first action that contains them.
> +  * flow counters are stored in the actions defined by the flow and not
> +in the
> +  * flow itself, therefore we need to traverse the flower action in
> +search for
> +  * them.
> +  *
> +  * @param opt
> +  *   flower section in the Netlink message received.
> +  * @param[out] qc
> +  *   Count statistics retrieved from the message query.
Void *data
> +  */
> +static void
> +tc_flow_parse_action(const struct rtattr *arg, struct
> +rte_flow_query_count *qc) {
If this parse multiple actions then the name should be flow_tcf_parse_actions
> +	struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
> +	int i;
> +
> +	if (arg == NULL)
> +		return;
> +	tc_parse_rtattr(tb, TCA_ACT_MAX_PRIO, RTA_DATA(arg),
> RTA_PAYLOAD(arg));
> +	for (i = 0; i <= TCA_ACT_MAX_PRIO; i++)
> +		if (tb[i])
> +			if (tc_flow_parse_one_action(tb[i], qc) == 0)
> +				break;
> +}
> +
> + /**
> +  * Parse Netlink reply on flower type of filters, retrieving the flow
> +counters
Not only
> +  * from it.
> +  *
> +  * @param opt
> +  *   flower section in the Netlink message received.
> +  * @param[out] qc
> +  *   Count statistics retrieved from the message query.
Void *data
> +  */
> +static void
> +tc_flower_parse_opt(struct rtattr *opt,
> +		    struct rte_flow_query_count *qc)
> +{
> +	struct rtattr *tb[TCA_FLOWER_MAX + 1];
> +
> +	if (!opt)
> +		return;
> +	tc_parse_rtattr(tb, TCA_FLOWER_MAX, RTA_DATA(opt),
> RTA_PAYLOAD(opt));
> +	if (tb[TCA_FLOWER_ACT])
> +		tc_flow_parse_action(tb[TCA_FLOWER_ACT], qc); }
> +
> + /**
> +  * Parse Netlink reply on filter query, retrieving the flow counters.
Not only counters
> +  *
> +  * @param nlh
> +  *   Message received from Netlink.
> +  * @param[out] qc
> +  *   Count statistics retrieved from the message query.
Void * data
> +  *
> +  * @return
> +  *   MNL_CB_ERROR on error, MNL_CB_OK value otherwise.
> +  */
> +static int
> +mlx5_nl_flow_parse_filter(const struct nlmsghdr *nlh,
> +			  struct rte_flow_query_count *qc)
> +{
> +	struct tcmsg *t = NLMSG_DATA(nlh);
> +	int len = nlh->nlmsg_len;
> +	struct rtattr *tb[TCA_MAX + 1] = { };
> +
> +	if (nlh->nlmsg_type != RTM_NEWTFILTER &&
> +	    nlh->nlmsg_type != RTM_GETTFILTER &&
> +	    nlh->nlmsg_type != RTM_DELTFILTER)
> +		return MNL_CB_OK;
> +	len -= NLMSG_LENGTH(sizeof(*t));
> +	if (len < 0)
> +		return MNL_CB_ERROR;
> +	tc_parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len);
> +	if (tb[TCA_KIND])
> +		if (strcmp(RTA_DATA(tb[TCA_KIND]), "flower") == 0)
> +			tc_flower_parse_opt(tb[TCA_OPTIONS], qc);
> +	return MNL_CB_OK;
> +}
> +
> +/**
> + * A callback to parse Netlink reply on filter query attempting to
> +retrieve the
> + * flow counters if present.
> + *
> + * @param nlh
> + *   Message received from Netlink.
> + * @param[out] data
> + *   pointer to the count statistics to be filled by the routine.
> + *
> + * @return
> + *   MNL_CB_ERROR on error, MNL_CB_OK value otherwise.
> + */
> +static int
> +mlx5_nl_flow_parse_message(const struct nlmsghdr *nlh, void *data) {
> +	struct rte_flow_query_count *qc = (struct rte_flow_query_count
> *)data;
> +
> +	switch (nlh->nlmsg_type) {
> +	case NLMSG_NOOP:
> +		return MNL_CB_OK;
> +	case NLMSG_ERROR:
> +	case NLMSG_OVERRUN:
> +		return MNL_CB_ERROR;
> +	default:
> +		break;
> +	}
> +	return mlx5_nl_flow_parse_filter(nlh, qc); }
> +
> + /**
> +  * Query a tcf rule for its statistics via netlink.
> +  *
> +  * @param[in] dev
> +  *   Pointer to Ethernet device.
> +  * @param[in] flow
> +  *   Pointer to the sub flow.
> +  * @param[out] data
> +  *   data retrieved by the query.
> +  * @param[out] error
> +  *   Perform verbose error reporting if not NULL.
> +  *
> +  * @return
> +  *   0 on success, a negative errno value otherwise and rte_errno is set.
> +  */
> +static int
> +mlx5_flow_tcf_query(struct rte_eth_dev *dev,
> +			  struct rte_flow *flow,
> +			  enum rte_flow_action_type type,
> +			  void *data,
> +			  struct rte_flow_error *error)
> +{
> +	struct rte_flow_query_count *qc = data;
> +	struct priv *priv = dev->data->dev_private;
> +	struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
> +	struct mnl_socket *nl = ctx->nl;
> +	struct mlx5_flow *dev_flow;
> +	struct nlmsghdr *nlh;
> +	uint32_t seq = priv->tcf_context->seq++;
Maybe better to have it random. See previous patch. 
> +	ssize_t ret;
> +	assert(qc);
> +
> +	dev_flow = LIST_FIRST(&flow->dev_flows);
I would expect that dev_flow would be the mlx5 format of the rte_flow provided. It is not always the first in the flows list. 
> +	/* E-Switch flow can't be expanded. */
> +	assert(!LIST_NEXT(dev_flow, next));
> +	/* Currently only query count is supported. */
> +	if (type != RTE_FLOW_ACTION_TYPE_COUNT)
> +		goto error_nosup;
This check is not needed. mlx5_flow_query should protect against it. 
> +	nlh = dev_flow->tcf.nlh;
> +	nlh->nlmsg_type = RTM_GETTFILTER;
> +	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
> +	nlh->nlmsg_seq = seq;
> +	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
> +		goto error_exit;
> +	ret = mnl_socket_recvfrom(nl, priv->tcf_context->buf,
> +				  priv->tcf_context->buf_size);
Why not allocating the buffer along with its needed size on the stack? Isn't is simpler? 
> +	if (ret == -1)
> +		goto error_exit;
> +	while (ret > 0) {
> +		ret = mnl_cb_run(ctx->buf, ret, seq,
> +				 mnl_socket_get_portid(nl),
> +				 mlx5_nl_flow_parse_message, qc);
I don't understand why you need this callback and not simply call mlx5_nl_flow_parse_message. 
> +		if (ret <= MNL_CB_STOP)
> +			break;
> +		ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
> +	}
> +	/* Return the delta from last reset. */
> +	qc->hits -= dev_flow->tcf.hits;
> +	qc->bytes -= dev_flow->tcf.bytes;
I don't get the logic.
After the mlx5_nl_flow_parse_message callbacks, on qc you have the current count value. After the above two lines you override it with the old value.
I think you meant to provide a different qc struct toe the mlx5_nl_flow_parse_message function and subtract? 
> +	if (qc->reset) {
> +		dev_flow->tcf.hits = qc->hits;
> +		dev_flow->tcf.bytes = qc->bytes;
> +	}
> +	return 0;
> +error_nosup:
> +	return rte_flow_error_set
> +			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
> +			 NULL, "tcf: unsupported query");
The error_nosup can be removed. 
> +error_exit:
> +	return rte_flow_error_set
> +			(error, errno,
> RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +			 NULL, "netlink: failed to read flow rule statistics"); }
> +
>  const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
>  	.validate = flow_tcf_validate,
>  	.prepare = flow_tcf_prepare,
> @@ -1571,6 +1854,7 @@ struct flow_tcf_ptoi {
>  	.apply = flow_tcf_apply,
>  	.remove = flow_tcf_remove,
>  	.destroy = flow_tcf_destroy,
> +	.query = mlx5_flow_tcf_query,
>  };
> 
>  /**
> diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c
> b/drivers/net/mlx5/mlx5_flow_verbs.c
> index ad8f7ac..e377b3b 100644
> --- a/drivers/net/mlx5/mlx5_flow_verbs.c
> +++ b/drivers/net/mlx5/mlx5_flow_verbs.c
> @@ -1655,4 +1655,5 @@
>  	.apply = flow_verbs_apply,
>  	.remove = flow_verbs_remove,
>  	.destroy = flow_verbs_destroy,
> +	.query = flow_null_query,
You will need implementation here. 
>  };
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* [dpdk-dev] [PATCH v3 0/2] support e-switch flow count action
  2018-10-11 13:04 ` [dpdk-dev] [PATCH v2 2/2] net/mlx5: support e-switch flow count action Mordechay Haimovsky
  2018-10-14 11:07   ` Shahaf Shuler
@ 2018-10-16 23:50   ` Mordechay Haimovsky
  2018-10-16 23:50   ` [dpdk-dev] [PATCH v3 1/2] net/mlx5: refactor TC-flow infrastructure Mordechay Haimovsky
  2018-10-16 23:50   ` [dpdk-dev] [PATCH v3 2/2] net/mlx5: support e-switch flow count action Mordechay Haimovsky
  3 siblings, 0 replies; 21+ messages in thread
From: Mordechay Haimovsky @ 2018-10-16 23:50 UTC (permalink / raw)
  To: Shahaf Shuler; +Cc: dev, Mordechay Haimovsky
The following patches add support in mlx5 PMD for configuring and
reading flow counters from the device e-switch.
Moti Haimovsky (2):
  net/mlx5: refactor TC-flow infrastructure
  net/mlx5: support e-switch flow count action
 drivers/net/mlx5/mlx5.c            |  18 +-
 drivers/net/mlx5/mlx5.h            |   4 +-
 drivers/net/mlx5/mlx5_flow.c       |  99 ++----
 drivers/net/mlx5/mlx5_flow.h       |  20 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   1 +
 drivers/net/mlx5/mlx5_flow_tcf.c   | 618 +++++++++++++++++++++++++++++++++++--
 drivers/net/mlx5/mlx5_flow_verbs.c |  87 ++++++
 7 files changed, 739 insertions(+), 108 deletions(-)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* [dpdk-dev] [PATCH v3 1/2] net/mlx5: refactor TC-flow infrastructure
  2018-10-11 13:04 ` [dpdk-dev] [PATCH v2 2/2] net/mlx5: support e-switch flow count action Mordechay Haimovsky
  2018-10-14 11:07   ` Shahaf Shuler
  2018-10-16 23:50   ` [dpdk-dev] [PATCH v3 0/2] " Mordechay Haimovsky
@ 2018-10-16 23:50   ` Mordechay Haimovsky
  2018-10-16 23:50   ` [dpdk-dev] [PATCH v3 2/2] net/mlx5: support e-switch flow count action Mordechay Haimovsky
  3 siblings, 0 replies; 21+ messages in thread
From: Mordechay Haimovsky @ 2018-10-16 23:50 UTC (permalink / raw)
  To: Shahaf Shuler; +Cc: dev, Mordechay Haimovsky
This commit refactors tc_flow as a preparation to coming commits
that sends different type of messages and expect differ type of replies
while still using the same underlying routines.
Signed-off-by: Moti Haimovsky <motih@mellanox.com>
---
v3:
 * Rebase on top of
   d80c8167c4fe ("net/mlx5: fix compilation issue on ARM SOC")
v2:
 * Rebase on top of 3f4722ee01e7 ("net/mlx5: refactor TC-flow infrastructure")
---
 drivers/net/mlx5/mlx5.c          |  18 +++----
 drivers/net/mlx5/mlx5.h          |   4 +-
 drivers/net/mlx5/mlx5_flow.h     |   8 +--
 drivers/net/mlx5/mlx5_flow_tcf.c | 113 ++++++++++++++++++++++++++++++---------
 4 files changed, 102 insertions(+), 41 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 795a219..13f2fd4 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -286,8 +286,8 @@
 		close(priv->nl_socket_route);
 	if (priv->nl_socket_rdma >= 0)
 		close(priv->nl_socket_rdma);
-	if (priv->mnl_socket)
-		mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+	if (priv->tcf_context)
+		mlx5_flow_tcf_context_destroy(priv->tcf_context);
 	ret = mlx5_hrxq_ibv_verify(dev);
 	if (ret)
 		DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -1139,8 +1139,8 @@
 	claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
 	if (vf && config.vf_nl_en)
 		mlx5_nl_mac_addr_sync(eth_dev);
-	priv->mnl_socket = mlx5_flow_tcf_socket_create();
-	if (!priv->mnl_socket) {
+	priv->tcf_context = mlx5_flow_tcf_context_create();
+	if (!priv->tcf_context) {
 		err = -rte_errno;
 		DRV_LOG(WARNING,
 			"flow rules relying on switch offloads will not be"
@@ -1155,7 +1155,7 @@
 			error.message =
 				"cannot retrieve network interface index";
 		} else {
-			err = mlx5_flow_tcf_init(priv->mnl_socket, ifindex,
+			err = mlx5_flow_tcf_init(priv->tcf_context, ifindex,
 						&error);
 		}
 		if (err) {
@@ -1163,8 +1163,8 @@
 				"flow rules relying on switch offloads will"
 				" not be supported: %s: %s",
 				error.message, strerror(rte_errno));
-			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
-			priv->mnl_socket = NULL;
+			mlx5_flow_tcf_context_destroy(priv->tcf_context);
+			priv->tcf_context = NULL;
 		}
 	}
 	TAILQ_INIT(&priv->flows);
@@ -1219,8 +1219,8 @@
 			close(priv->nl_socket_route);
 		if (priv->nl_socket_rdma >= 0)
 			close(priv->nl_socket_rdma);
-		if (priv->mnl_socket)
-			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+		if (priv->tcf_context)
+			mlx5_flow_tcf_context_destroy(priv->tcf_context);
 		if (own_domain_id)
 			claim_zero(rte_eth_switch_domain_free(priv->domain_id));
 		rte_free(priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 2dec88a..d14239c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -169,7 +169,7 @@ struct mlx5_drop {
 	struct mlx5_rxq_ibv *rxq; /* Verbs Rx queue. */
 };
 
-struct mnl_socket;
+struct mlx5_flow_tcf_context;
 
 struct priv {
 	LIST_ENTRY(priv) mem_event_cb; /* Called by memory event callback. */
@@ -236,7 +236,7 @@ struct priv {
 	rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
 	/* UAR same-page access control required in 32bit implementations. */
 #endif
-	struct mnl_socket *mnl_socket; /* Libmnl socket. */
+	struct mlx5_flow_tcf_context *tcf_context; /* TC flower context. */
 };
 
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 094f666..5cb05ba 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -346,9 +346,9 @@ int mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
 
 /* mlx5_flow_tcf.c */
 
-int mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
-		       struct rte_flow_error *error);
-struct mnl_socket *mlx5_flow_tcf_socket_create(void);
-void mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl);
+int mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
+		       unsigned int ifindex, struct rte_flow_error *error);
+struct mlx5_flow_tcf_context *mlx5_flow_tcf_context_create(void);
+void mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx);
 
 #endif /* RTE_PMD_MLX5_FLOW_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 4b51a85..f232373 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -231,6 +231,19 @@ struct tc_pedit_sel {
 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
 #endif
 
+/**
+ * Structure for holding netlink context.
+ * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
+ * Using this (8KB) buffer size ensures that netlink messages will never be
+ * truncated.
+ */
+struct mlx5_flow_tcf_context {
+	struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
+	uint32_t seq; /* Message sequence number. */
+	uint32_t buf_size; /* Message buffer size. */
+	uint8_t *buf; /* Message buffer. */
+};
+
 /** Empty masks for known item types. */
 static const union {
 	struct rte_flow_item_port_id port_id;
@@ -1956,8 +1969,8 @@ struct pedit_parser {
 /**
  * Send Netlink message with acknowledgment.
  *
- * @param nl
- *   Libmnl socket to use.
+ * @param ctx
+ *   Flow context to use.
  * @param nlh
  *   Message to send. This function always raises the NLM_F_ACK flag before
  *   sending.
@@ -1966,12 +1979,13 @@ struct pedit_parser {
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
+flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
 {
 	alignas(struct nlmsghdr)
 	uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
 		    nlh->nlmsg_len - sizeof(*nlh)];
-	uint32_t seq = random();
+	uint32_t seq = ctx->seq++;
+	struct mnl_socket *nl = ctx->nl;
 	int ret;
 
 	nlh->nlmsg_flags |= NLM_F_ACK;
@@ -2006,7 +2020,7 @@ struct pedit_parser {
 	       struct rte_flow_error *error)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct mnl_socket *nl = priv->mnl_socket;
+	struct mlx5_flow_tcf_context *nl = priv->tcf_context;
 	struct mlx5_flow *dev_flow;
 	struct nlmsghdr *nlh;
 
@@ -2035,7 +2049,7 @@ struct pedit_parser {
 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct mnl_socket *nl = priv->mnl_socket;
+	struct mlx5_flow_tcf_context *nl = priv->tcf_context;
 	struct mlx5_flow *dev_flow;
 	struct nlmsghdr *nlh;
 
@@ -2087,10 +2101,47 @@ struct pedit_parser {
 };
 
 /**
- * Initialize ingress qdisc of a given network interface.
+ * Create and configure a libmnl socket for Netlink flow rules.
+ *
+ * @return
+ *   A valid libmnl socket object pointer on success, NULL otherwise and
+ *   rte_errno is set.
+ */
+static struct mnl_socket *
+mlx5_flow_mnl_socket_create(void)
+{
+	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
+
+	if (nl) {
+		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
+				      sizeof(int));
+		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
+			return nl;
+	}
+	rte_errno = errno;
+	if (nl)
+		mnl_socket_close(nl);
+	return NULL;
+}
+
+/**
+ * Destroy a libmnl socket.
  *
  * @param nl
  *   Libmnl socket of the @p NETLINK_ROUTE kind.
+ */
+static void
+mlx5_flow_mnl_socket_destroy(struct mnl_socket *nl)
+{
+	if (nl)
+		mnl_socket_close(nl);
+}
+
+/**
+ * Initialize ingress qdisc of a given network interface.
+ *
+ * @param nl
+ *   Pointer to tc-flower context to use.
  * @param ifindex
  *   Index of network interface to initialize.
  * @param[out] error
@@ -2100,8 +2151,8 @@ struct pedit_parser {
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
-		   struct rte_flow_error *error)
+mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *nl,
+		   unsigned int ifindex, struct rte_flow_error *error)
 {
 	struct nlmsghdr *nlh;
 	struct tcmsg *tcm;
@@ -2143,37 +2194,47 @@ struct pedit_parser {
 }
 
 /**
- * Create and configure a libmnl socket for Netlink flow rules.
+ * Create libmnl context for Netlink flow rules.
  *
  * @return
  *   A valid libmnl socket object pointer on success, NULL otherwise and
  *   rte_errno is set.
  */
-struct mnl_socket *
-mlx5_flow_tcf_socket_create(void)
+struct mlx5_flow_tcf_context *
+mlx5_flow_tcf_context_create(void)
 {
-	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
-
-	if (nl) {
-		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
-				      sizeof(int));
-		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
-			return nl;
-	}
-	rte_errno = errno;
-	if (nl)
-		mnl_socket_close(nl);
+	struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
+							sizeof(*ctx),
+							sizeof(uint32_t));
+	if (!ctx)
+		goto error;
+	ctx->nl = mlx5_flow_mnl_socket_create();
+	if (!ctx->nl)
+		goto error;
+	ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
+	ctx->buf = rte_zmalloc(__func__,
+			       ctx->buf_size, sizeof(uint32_t));
+	if (!ctx->buf)
+		goto error;
+	ctx->seq = random();
+	return ctx;
+error:
+	mlx5_flow_tcf_context_destroy(ctx);
 	return NULL;
 }
 
 /**
- * Destroy a libmnl socket.
+ * Destroy a libmnl context.
  *
  * @param nl
  *   Libmnl socket of the @p NETLINK_ROUTE kind.
  */
 void
-mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
+mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
 {
-	mnl_socket_close(nl);
+	if (!ctx)
+		return;
+	mlx5_flow_mnl_socket_destroy(ctx->nl);
+	rte_free(ctx->buf);
+	rte_free(ctx);
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* [dpdk-dev] [PATCH v3 2/2] net/mlx5: support e-switch flow count action
  2018-10-11 13:04 ` [dpdk-dev] [PATCH v2 2/2] net/mlx5: support e-switch flow count action Mordechay Haimovsky
                     ` (2 preceding siblings ...)
  2018-10-16 23:50   ` [dpdk-dev] [PATCH v3 1/2] net/mlx5: refactor TC-flow infrastructure Mordechay Haimovsky
@ 2018-10-16 23:50   ` Mordechay Haimovsky
  2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 0/3] " Moti Haimovsky
                       ` (3 more replies)
  3 siblings, 4 replies; 21+ messages in thread
From: Mordechay Haimovsky @ 2018-10-16 23:50 UTC (permalink / raw)
  To: Shahaf Shuler; +Cc: dev, Mordechay Haimovsky
This commit adds support for configuring flows destined to the mlx5
eswitch with 'count' action and for querying these counts at runtime.
Each flow rule configured by the mlx5 driver is implicitly assigned
with flow counters. These counters can be retrieved when querying
the flow rule via Netlink, they can be found in each flow action
section of the reply. Hence, supporting the 'count' action in the
flow configuration command is straight-forward. When transposing
the command to a tc Netlink message we just ignore it instead of
rejecting it.
In the 'flow query count' side, the command now uses tc Netlink
query command in order to retrieve the values of the flow counters.
Signed-off-by: Moti Haimovsky <motih@mellanox.com>
---
v3:
 * Rebase on top of
   d80c8167c4fe ("net/mlx5: fix compilation issue on ARM SOC")
 * Code modifications accordig to review by Shahaf S.
   see message ID: 1539263057-16678-3-git-send-email-motih@mellanox.com
v2:
 * Rebase on top of 3f4722ee01e7 ("net/mlx5: refactor TC-flow infrastructure")
---
 drivers/net/mlx5/mlx5_flow.c       |  99 +++----
 drivers/net/mlx5/mlx5_flow.h       |  12 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   1 +
 drivers/net/mlx5/mlx5_flow_tcf.c   | 515 ++++++++++++++++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_flow_verbs.c |  87 +++++++
 5 files changed, 642 insertions(+), 72 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index bd70fce..dc68e46 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1653,6 +1653,19 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 {
 }
 
+int
+mlx5_flow_null_query(struct rte_eth_dev *dev __rte_unused,
+		     struct rte_flow *flow __rte_unused,
+		     const struct rte_flow_action *actions __rte_unused,
+		     void *data __rte_unused,
+		     struct rte_flow_error *error)
+{
+	return rte_flow_error_set(error, ENOTSUP,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				  NULL,
+				  "counters are not available");
+}
+
 /* Void driver to protect from null pointer reference. */
 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
 	.validate = flow_null_validate,
@@ -1661,6 +1674,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 	.apply = flow_null_apply,
 	.remove = flow_null_remove,
 	.destroy = flow_null_destroy,
+	.query = mlx5_flow_null_query,
 };
 
 /**
@@ -2344,59 +2358,25 @@ struct rte_flow *
 }
 
 /**
- * Query flow counter.
- *
- * @param flow
- *   Pointer to the flow.
+ * Query a flows.
  *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ * @see rte_flow_query()
+ * @see rte_flow_ops
  */
 static int
-mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
-		      void *data __rte_unused,
-		      struct rte_flow_error *error)
+flow_drv_query(struct rte_eth_dev *dev,
+	       struct rte_flow *flow,
+	       const struct rte_flow_action *actions,
+	       void *data,
+	       struct rte_flow_error *error)
 {
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-	if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
-		struct rte_flow_query_count *qc = data;
-		uint64_t counters[2] = {0, 0};
-		struct ibv_query_counter_set_attr query_cs_attr = {
-			.cs = flow->counter->cs,
-			.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
-		};
-		struct ibv_counter_set_data query_out = {
-			.out = counters,
-			.outlen = 2 * sizeof(uint64_t),
-		};
-		int err = mlx5_glue->query_counter_set(&query_cs_attr,
-						       &query_out);
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type ftype = flow->drv_type;
 
-		if (err)
-			return rte_flow_error_set
-				(error, err,
-				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				 NULL,
-				 "cannot read counter");
-		qc->hits_set = 1;
-		qc->bytes_set = 1;
-		qc->hits = counters[0] - flow->counter->hits;
-		qc->bytes = counters[1] - flow->counter->bytes;
-		if (qc->reset) {
-			flow->counter->hits = counters[0];
-			flow->counter->bytes = counters[1];
-		}
-		return 0;
-	}
-	return rte_flow_error_set(error, EINVAL,
-				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				  NULL,
-				  "flow does not have counter");
-#endif
-	return rte_flow_error_set(error, ENOTSUP,
-				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				  NULL,
-				  "counters are not available");
+	assert(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(ftype);
+
+	return fops->query(dev, flow, actions, data, error);
 }
 
 /**
@@ -2406,30 +2386,17 @@ struct rte_flow *
  * @see rte_flow_ops
  */
 int
-mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
+mlx5_flow_query(struct rte_eth_dev *dev,
 		struct rte_flow *flow,
 		const struct rte_flow_action *actions,
 		void *data,
 		struct rte_flow_error *error)
 {
-	int ret = 0;
+	int ret;
 
-	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
-		switch (actions->type) {
-		case RTE_FLOW_ACTION_TYPE_VOID:
-			break;
-		case RTE_FLOW_ACTION_TYPE_COUNT:
-			ret = mlx5_flow_query_count(flow, data, error);
-			break;
-		default:
-			return rte_flow_error_set(error, ENOTSUP,
-						  RTE_FLOW_ERROR_TYPE_ACTION,
-						  actions,
-						  "action not supported");
-		}
-		if (ret < 0)
-			return ret;
-	}
+	ret = flow_drv_query(dev, flow, actions, data, error);
+	if (ret < 0)
+		return ret;
 	return 0;
 }
 
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 5cb05ba..49fa1cb 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -239,7 +239,6 @@ struct rte_flow {
 	struct rte_flow_action_rss rss;/**< RSS context. */
 	uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
 	uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
-	void *nl_flow; /**< Netlink flow buffer if relevant. */
 	LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
 	/**< Device flows that are part of the flow. */
 	uint32_t actions; /**< Bit-fields which mark all detected actions. */
@@ -265,6 +264,11 @@ typedef void (*mlx5_flow_remove_t)(struct rte_eth_dev *dev,
 				   struct rte_flow *flow);
 typedef void (*mlx5_flow_destroy_t)(struct rte_eth_dev *dev,
 				    struct rte_flow *flow);
+typedef int (*mlx5_flow_query_t)(struct rte_eth_dev *dev,
+				 struct rte_flow *flow,
+				 const struct rte_flow_action *actions,
+				 void *data,
+				 struct rte_flow_error *error);
 struct mlx5_flow_driver_ops {
 	mlx5_flow_validate_t validate;
 	mlx5_flow_prepare_t prepare;
@@ -272,10 +276,16 @@ struct mlx5_flow_driver_ops {
 	mlx5_flow_apply_t apply;
 	mlx5_flow_remove_t remove;
 	mlx5_flow_destroy_t destroy;
+	mlx5_flow_query_t query;
 };
 
 /* mlx5_flow.c */
 
+int mlx5_flow_null_query(struct rte_eth_dev *dev,
+			 struct rte_flow *flow,
+			 const struct rte_flow_action *actions,
+			 void *data,
+			 struct rte_flow_error *error);
 uint64_t mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow, int tunnel,
 				     uint32_t layer_types,
 				     uint64_t hash_fields);
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index becbc57..188aca2 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -1370,6 +1370,7 @@
 	.apply = flow_dv_apply,
 	.remove = flow_dv_remove,
 	.destroy = flow_dv_destroy,
+	.query = mlx5_flow_null_query,
 };
 
 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index f232373..b9f23ea 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -6,6 +6,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <libmnl/libmnl.h>
+#include <linux/gen_stats.h>
 #include <linux/if_ether.h>
 #include <linux/netlink.h>
 #include <linux/pkt_cls.h>
@@ -231,6 +232,10 @@ struct tc_pedit_sel {
 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
 #endif
 
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
 /**
  * Structure for holding netlink context.
  * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
@@ -241,7 +246,16 @@ struct mlx5_flow_tcf_context {
 	struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
 	uint32_t seq; /* Message sequence number. */
 	uint32_t buf_size; /* Message buffer size. */
-	uint8_t *buf; /* Message buffer. */
+	uint8_t *buf;
+	/* Message buffer (used for receiving large netlink messages). */
+};
+
+/** Structure used when extracting the values of a flow counters
+ * from a netlink message.
+ */
+struct flow_tcf_stats_basic {
+	bool valid;
+	struct gnet_stats_basic counters;
 };
 
 /** Empty masks for known item types. */
@@ -356,6 +370,51 @@ struct pedit_parser {
 	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
 };
 
+/**
+ * Create space for using the implicitly created TC flow counter.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ *
+ * @return
+ *   A pointer to the counter data structure, NULL otherwise and
+ *   rte_errno is set.
+ */
+static struct mlx5_flow_counter *
+flow_tcf_counter_new(void)
+{
+	struct mlx5_flow_counter *cnt;
+
+	struct mlx5_flow_counter tmpl = {
+		.ref_cnt = 1,
+		.shared = 0,
+		.id = 0,
+		.cs = NULL,
+		.hits = 0,
+		.bytes = 0,
+	};
+	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
+	if (!cnt) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	*cnt = tmpl;
+	/* Implicit counter, do not add to list. */
+	return cnt;
+}
+
+/**
+ * Release a flow counter.
+ *
+ * @param[in] counter
+ *   Pointer to the counter handler.
+ */
+static void
+flow_tcf_counter_release(struct mlx5_flow_counter *counter)
+{
+	if (--counter->ref_cnt == 0)
+		rte_free(counter);
+}
 
 /**
  * Set pedit key of transport (TCP/UDP) port value
@@ -1067,6 +1126,8 @@ struct pedit_parser {
 		case RTE_FLOW_ACTION_TYPE_DROP:
 			current_action_flag = MLX5_FLOW_ACTION_DROP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
 			break;
@@ -1342,6 +1403,8 @@ struct pedit_parser {
 				SZ_NLATTR_TYPE_OF(struct tc_gact);
 			flags |= MLX5_FLOW_ACTION_DROP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
 			goto action_of_vlan;
@@ -1477,6 +1540,38 @@ struct pedit_parser {
 }
 
 /**
+ * Make adjustments for supporting count actions.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 On success else a negative errno value is returned and rte_errno is set.
+ */
+static int
+flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
+				  struct mlx5_flow *dev_flow,
+				  struct rte_flow_error *error)
+{
+	struct rte_flow *flow = dev_flow->flow;
+
+	if (!flow->counter) {
+		flow->counter = flow_tcf_counter_new();
+		if (!flow->counter)
+			return rte_flow_error_set(error, rte_errno,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  NULL,
+						  "cannot get counter"
+						  " context.");
+	}
+	return 0;
+}
+
+/**
  * Translate flow for Linux TC flower and construct Netlink message.
  *
  * @param[in] priv
@@ -1533,6 +1628,7 @@ struct pedit_parser {
 	struct nlattr *na_vlan_id = NULL;
 	struct nlattr *na_vlan_priority = NULL;
 	uint64_t item_flags = 0;
+	int ret;
 
 	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
 						PTOI_TABLE_SZ_MAX(dev)));
@@ -1875,6 +1971,16 @@ struct pedit_parser {
 			mnl_attr_nest_end(nlh, na_act);
 			mnl_attr_nest_end(nlh, na_act_index);
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			/*
+			 * Driver adds the count action implicitly for
+			 * each rule it creates.
+			 */
+			ret = flow_tcf_translate_action_count(dev,
+							      dev_flow, error);
+			if (ret < 0)
+				return ret;
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			conf.of_push_vlan = NULL;
 			vlan_act = TCA_VLAN_ACT_POP;
@@ -2055,6 +2161,10 @@ struct pedit_parser {
 
 	if (!flow)
 		return;
+	if (flow->counter) {
+		flow_tcf_counter_release(flow->counter);
+		flow->counter = NULL;
+	}
 	dev_flow = LIST_FIRST(&flow->dev_flows);
 	if (!dev_flow)
 		return;
@@ -2091,6 +2201,400 @@ struct pedit_parser {
 	rte_free(dev_flow);
 }
 
+/**
+ * Parse rtnetlink message attributes filling the attribute table with the info
+ * retrieved.
+ *
+ * @param tb
+ *   Attribute table to be filled.
+ * @param[out] max
+ *   Maxinum entry in the attribute table.
+ * @param rte
+ *   The attributes section in the message to be parsed.
+ * @param len
+ *   The length of the attributes section in the message.
+ */
+static void
+flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
+			 struct rtattr *rta, int len)
+{
+	unsigned short type;
+	memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+	while (RTA_OK(rta, len)) {
+		type = rta->rta_type;
+		if (type <= max && !tb[type])
+			tb[type] = rta;
+		rta = RTA_NEXT(rta, len);
+	}
+}
+
+ /**
+  * Extract flow counters from flower action.
+  *
+  * @param rta
+  *   flower action stats properties in the Netlink message received.
+  * @param rta_type
+  *   The backward sequence of rta_types, as written in the attribute table,
+  *   we need to traverse in order to get to the requested object.
+  * @param idx
+  *   Current location in rta_type table.
+  * @param[out] data
+  *   data holding the count statistics of the rte_flow retrieved from
+  *   the message.
+  *
+  * @return
+  *   0 if data was found and retrieved, -1 otherwise.
+  */
+static int
+flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
+				       uint16_t rta_type[], int idx,
+				       struct gnet_stats_basic *data)
+{
+	struct rtattr *tbs[TCA_STATS_MAX + 1];
+
+	if (rta == NULL || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tbs, TCA_STATS_MAX,
+				 RTA_DATA(rta), RTA_PAYLOAD(rta));
+	switch (rta_type[idx]) {
+	case TCA_STATS_BASIC:
+		if (tbs[TCA_STATS_BASIC]) {
+			memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
+			       RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
+			       sizeof(*data)));
+			return 0;
+		}
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+ /**
+  * Parse flower single action retrieving the requested action attribute,
+  * if found.
+  *
+  * @param arg
+  *   flower action properties in the Netlink message received.
+  * @param rta_type
+  *   The backward sequence of rta_types, as written in the attribute table,
+  *   we need to traverse in order to get to the requested object.
+  * @param idx
+  *   Current location in rta_type table.
+  * @param[out] data
+  *   Count statistics retrieved from the message query.
+  *
+  * @return
+  *   0 if data was found and retrieved, -1 otherwise.
+  */
+static int
+flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
+				     uint16_t rta_type[], int idx, void *data)
+{
+	struct rtattr *tb[TCA_ACT_MAX + 1];
+
+	if (arg == NULL || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX,
+				 RTA_DATA(arg), RTA_PAYLOAD(arg));
+	if (tb[TCA_ACT_KIND] == NULL)
+		return -1;
+	switch (rta_type[idx]) {
+	case TCA_ACT_STATS:
+		if (tb[TCA_ACT_STATS])
+			return flow_tcf_nl_action_stats_parse_and_get
+					(tb[TCA_ACT_STATS],
+					 rta_type, --idx,
+					 (struct gnet_stats_basic *)data);
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+ /**
+  * Parse flower action section in the message retrieving the requested
+  * attribute from the first action that provides it.
+  *
+  * @param opt
+  *   flower section in the Netlink message received.
+  * @param rta_type
+  *   The backward sequence of rta_types, as written in the attribute table,
+  *   we need to traverse in order to get to the requested object.
+  * @param idx
+  *   Current location in rta_type table.
+  * @param[out] data
+  *   data retrieved from the message query.
+  *
+  * @return
+  *   0 if data was found and retrieved, -1 otherwise.
+  */
+static int
+flow_tcf_nl_action_parse_and_get(const struct rtattr *arg,
+				 uint16_t rta_type[], int idx, void *data)
+{
+	struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
+	int i;
+
+	if (arg == NULL || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
+				 RTA_DATA(arg), RTA_PAYLOAD(arg));
+	switch (rta_type[idx]) {
+	/*
+	 * flow counters are stored in the actions defined by the flow
+	 * and not in the flow itself, therefore we need to traverse the
+	 * flower chain of actions in search for them.
+	 *
+	 * Note that the index is not decremented here.
+	 */
+	case TCA_ACT_STATS:
+		for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
+			if (tb[i] &&
+			!flow_tcf_nl_parse_one_action_and_get(tb[i],
+							      rta_type,
+							      idx, data))
+				return 0;
+		}
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+ /**
+  * Parse flower classifier options in the message, retrieving the requested
+  * attribute if found.
+  *
+  * @param opt
+  *   flower section in the Netlink message received.
+  * @param rta_type
+  *   The backward sequence of rta_types, as written in the attribute table,
+  *   we need to traverse in order to get to the requested object.
+  * @param idx
+  *   Current location in rta_type table.
+  * @param[out] data
+  *   data retrieved from the message query.
+  *
+  * @return
+  *   0 if data was found and retrieved, -1 otherwise.
+  */
+static int
+flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
+			       uint16_t rta_type[], int idx, void *data)
+{
+	struct rtattr *tb[TCA_FLOWER_MAX + 1];
+
+	if (!opt || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, TCA_FLOWER_MAX,
+				 RTA_DATA(opt), RTA_PAYLOAD(opt));
+	switch (rta_type[idx]) {
+	case TCA_FLOWER_ACT:
+		if (tb[TCA_FLOWER_ACT])
+			return flow_tcf_nl_action_parse_and_get
+							(tb[TCA_FLOWER_ACT],
+							 rta_type, --idx, data);
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+ /**
+  * Parse Netlink reply on filter query, retrieving the flow counters.
+  *
+  * @param nlh
+  *   Message received from Netlink.
+  * @param rta_type
+  *   The backward sequence of rta_types, as written in the attribute table,
+  *   we need to traverse in order to get to the requested object.
+  * @param idx
+  *   Current location in rta_type table.
+  * @param[out] data
+  *   data retrieved from the message query.
+  *
+  * @return
+  *   0 if data was found and retrieved, -1 otherwise.
+  */
+static int
+flow_tcf_nl_filter_parse_and_get(const struct nlmsghdr *nlh,
+				 uint16_t rta_type[], int idx, void *data)
+{
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len = nlh->nlmsg_len;
+	struct rtattr *tb[TCA_MAX + 1];
+
+	if (idx < 0)
+		return -1;
+	if (nlh->nlmsg_type != RTM_NEWTFILTER &&
+	    nlh->nlmsg_type != RTM_GETTFILTER &&
+	    nlh->nlmsg_type != RTM_DELTFILTER)
+		return -1;
+	len -= NLMSG_LENGTH(sizeof(*t));
+	if (len < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len);
+	/* Not a TC flower flow - bail out */
+	if (!tb[TCA_KIND] ||
+	    strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
+		return -1;
+	switch (rta_type[idx]) {
+	case TCA_OPTIONS:
+		if (tb[TCA_OPTIONS])
+			return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
+							      rta_type,
+							      --idx, data);
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+/**
+ * A callback to parse Netlink reply on TC flower query.
+ *
+ * @param nlh
+ *   Message received from Netlink.
+ * @param[out] data
+ *   Pointer to data area to be filled by the parsing routine.
+ *   assumed to be a pinter to struct flow_tcf_stats_basic.
+ *
+ * @return
+ *   MNL_CB_OK value.
+ */
+static int
+flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
+{
+	/*
+	 * The backward sequence of rta_types to pass in order to get
+	 *  to the counters.
+	 */
+	uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
+				TCA_FLOWER_ACT, TCA_OPTIONS };
+	struct flow_tcf_stats_basic *sb_data = data;
+
+	if (!flow_tcf_nl_filter_parse_and_get(nlh, rta_type,
+					      ARRAY_SIZE(rta_type) - 1,
+					      (void *)&sb_data->counters))
+		sb_data->valid = true;
+	return MNL_CB_OK;
+}
+
+/**
+ * Query a TC flower rule for its statistics via netlink.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] flow
+ *   Pointer to the sub flow.
+ * @param[out] data
+ *   data retrieved by the query.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_tcf_query_count(struct rte_eth_dev *dev,
+			  struct rte_flow *flow,
+			  void *data,
+			  struct rte_flow_error *error)
+{
+	struct flow_tcf_stats_basic sb_data = { 0 };
+	struct rte_flow_query_count *qc = data;
+	struct priv *priv = dev->data->dev_private;
+	struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
+	struct mnl_socket *nl = ctx->nl;
+	struct mlx5_flow *dev_flow;
+	struct nlmsghdr *nlh;
+	uint32_t seq = priv->tcf_context->seq++;
+	ssize_t ret;
+	assert(qc);
+
+	dev_flow = LIST_FIRST(&flow->dev_flows);
+	/* E-Switch flow can't be expanded. */
+	assert(!LIST_NEXT(dev_flow, next));
+	if (!dev_flow->flow->counter)
+		goto notsup_exit;
+	nlh = dev_flow->tcf.nlh;
+	nlh->nlmsg_type = RTM_GETTFILTER;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
+	nlh->nlmsg_seq = seq;
+	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
+		goto error_exit;
+	do {
+		ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
+		if (ret <= 0)
+			break;
+		ret = mnl_cb_run(ctx->buf, ret, seq,
+				 mnl_socket_get_portid(nl),
+				 flow_tcf_nl_message_get_stats_basic,
+				 (void *)&sb_data);
+	} while (ret > 0);
+	/* Return the delta from last reset. */
+	if (sb_data.valid) {
+		/* Return the delta from last reset. */
+		qc->hits_set = 1;
+		qc->bytes_set = 1;
+		qc->hits = sb_data.counters.packets;
+		qc->hits -= flow->counter->hits;
+		qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
+		if (qc->reset) {
+			flow->counter->hits = sb_data.counters.packets;
+			flow->counter->bytes = sb_data.counters.bytes;
+		}
+		return 0;
+	}
+	return -1;
+error_exit:
+	return rte_flow_error_set
+			(error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			 NULL, "netlink: failed to read flow rule statistics");
+notsup_exit:
+	return rte_flow_error_set
+			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			 NULL, "counters are not available.");
+}
+
+/**
+ * Query a flows.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_tcf_query(struct rte_eth_dev *dev,
+	       struct rte_flow *flow,
+	       const struct rte_flow_action *actions,
+	       void *data,
+	       struct rte_flow_error *error)
+{
+	int ret = -EINVAL;
+
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			ret = flow_tcf_query_count(dev, flow, data, error);
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	return ret;
+}
+
 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
 	.validate = flow_tcf_validate,
 	.prepare = flow_tcf_prepare,
@@ -2098,6 +2602,7 @@ struct pedit_parser {
 	.apply = flow_tcf_apply,
 	.remove = flow_tcf_remove,
 	.destroy = flow_tcf_destroy,
+	.query = flow_tcf_query,
 };
 
 /**
@@ -2108,7 +2613,7 @@ struct pedit_parser {
  *   rte_errno is set.
  */
 static struct mnl_socket *
-mlx5_flow_mnl_socket_create(void)
+flow_tcf_mnl_socket_create(void)
 {
 	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
 
@@ -2131,7 +2636,7 @@ struct pedit_parser {
  *   Libmnl socket of the @p NETLINK_ROUTE kind.
  */
 static void
-mlx5_flow_mnl_socket_destroy(struct mnl_socket *nl)
+flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
 {
 	if (nl)
 		mnl_socket_close(nl);
@@ -2208,7 +2713,7 @@ struct mlx5_flow_tcf_context *
 							sizeof(uint32_t));
 	if (!ctx)
 		goto error;
-	ctx->nl = mlx5_flow_mnl_socket_create();
+	ctx->nl = flow_tcf_mnl_socket_create();
 	if (!ctx->nl)
 		goto error;
 	ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
@@ -2234,7 +2739,7 @@ struct mlx5_flow_tcf_context *
 {
 	if (!ctx)
 		return;
-	mlx5_flow_mnl_socket_destroy(ctx->nl);
+	flow_tcf_mnl_socket_destroy(ctx->nl);
 	rte_free(ctx->buf);
 	rte_free(ctx);
 }
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 65c849c..d0cfcda 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -1651,6 +1651,92 @@
 	return -rte_errno;
 }
 
+/**
+ * Query a flows.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_verbs_query_count(struct rte_eth_dev *dev __rte_unused,
+		       struct rte_flow *flow __rte_unused,
+		       void *data __rte_unused,
+		       struct rte_flow_error *error)
+{
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+	if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
+		struct rte_flow_query_count *qc = data;
+		uint64_t counters[2] = {0, 0};
+		struct ibv_query_counter_set_attr query_cs_attr = {
+			.cs = flow->counter->cs,
+			.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
+		};
+		struct ibv_counter_set_data query_out = {
+			.out = counters,
+			.outlen = 2 * sizeof(uint64_t),
+		};
+		int err = mlx5_glue->query_counter_set(&query_cs_attr,
+						       &query_out);
+
+		if (err)
+			return rte_flow_error_set
+				(error, err,
+				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				 NULL,
+				 "cannot read counter");
+		qc->hits_set = 1;
+		qc->bytes_set = 1;
+		qc->hits = counters[0] - flow->counter->hits;
+		qc->bytes = counters[1] - flow->counter->bytes;
+		if (qc->reset) {
+			flow->counter->hits = counters[0];
+			flow->counter->bytes = counters[1];
+		}
+		return 0;
+	}
+	return rte_flow_error_set(error, EINVAL,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				  NULL,
+				  "flow does not have counter");
+#endif
+	return rte_flow_error_set(error, ENOTSUP,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				  NULL,
+				  "counters are not available");
+}
+
+/**
+ * Query a flows.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_verbs_query(struct rte_eth_dev *dev,
+		 struct rte_flow *flow,
+		 const struct rte_flow_action *actions,
+		 void *data,
+		 struct rte_flow_error *error)
+{
+	int ret = -EINVAL;
+
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			ret = flow_verbs_query_count(dev, flow, data, error);
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	return ret;
+}
+
 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
 	.validate = flow_verbs_validate,
 	.prepare = flow_verbs_prepare,
@@ -1658,4 +1744,5 @@
 	.apply = flow_verbs_apply,
 	.remove = flow_verbs_remove,
 	.destroy = flow_verbs_destroy,
+	.query = flow_verbs_query,
 };
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* [dpdk-dev] [PATCH v4 0/3] support e-switch flow count action
  2018-10-16 23:50   ` [dpdk-dev] [PATCH v3 2/2] net/mlx5: support e-switch flow count action Mordechay Haimovsky
@ 2018-10-17 17:24     ` Moti Haimovsky
  2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 1/3] net/mlx5: refactor TC-flow infrastructure Moti Haimovsky
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 21+ messages in thread
From: Moti Haimovsky @ 2018-10-17 17:24 UTC (permalink / raw)
  To: shahafs; +Cc: dev, Moti Haimovsky
The following patches add support in mlx5 PMD for configuring and
reading flow counters from the device e-switch.
Moti Haimovsky (3):
  net/mlx5: refactor TC-flow infrastructure
  net/mlx5: add flow query abstraction interface
  net/mlx5: support e-switch flow count action
 drivers/net/mlx5/mlx5.c            |  18 +-
 drivers/net/mlx5/mlx5.h            |   4 +-
 drivers/net/mlx5/mlx5_flow.c       |  99 ++----
 drivers/net/mlx5/mlx5_flow.h       |  15 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |  19 ++
 drivers/net/mlx5/mlx5_flow_tcf.c   | 620 +++++++++++++++++++++++++++++++++++--
 drivers/net/mlx5/mlx5_flow_verbs.c |  87 ++++++
 7 files changed, 753 insertions(+), 109 deletions(-)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* [dpdk-dev] [PATCH v4 1/3] net/mlx5: refactor TC-flow infrastructure
  2018-10-16 23:50   ` [dpdk-dev] [PATCH v3 2/2] net/mlx5: support e-switch flow count action Mordechay Haimovsky
  2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 0/3] " Moti Haimovsky
@ 2018-10-17 17:24     ` Moti Haimovsky
  2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 2/3] net/mlx5: add flow query abstraction interface Moti Haimovsky
  2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 3/3] net/mlx5: support e-switch flow count action Moti Haimovsky
  3 siblings, 0 replies; 21+ messages in thread
From: Moti Haimovsky @ 2018-10-17 17:24 UTC (permalink / raw)
  To: shahafs; +Cc: dev, Moti Haimovsky
This commit refactors tc_flow as a preparation to coming commits
that sends different type of messages and expect differ type of replies
while still using the same underlying routines.
Signed-off-by: Moti Haimovsky <motih@mellanox.com>
---
v3:
 * Rebase on top of
   d80c8167c4fe ("net/mlx5: fix compilation issue on ARM SOC")
v2:
 * Rebase on top of 3f4722ee01e7 ("net/mlx5: refactor TC-flow infrastructure")
---
 drivers/net/mlx5/mlx5.c          |  18 +++----
 drivers/net/mlx5/mlx5.h          |   4 +-
 drivers/net/mlx5/mlx5_flow.h     |   9 ++--
 drivers/net/mlx5/mlx5_flow_tcf.c | 113 ++++++++++++++++++++++++++++++---------
 4 files changed, 102 insertions(+), 42 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 795a219..13f2fd4 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -286,8 +286,8 @@
 		close(priv->nl_socket_route);
 	if (priv->nl_socket_rdma >= 0)
 		close(priv->nl_socket_rdma);
-	if (priv->mnl_socket)
-		mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+	if (priv->tcf_context)
+		mlx5_flow_tcf_context_destroy(priv->tcf_context);
 	ret = mlx5_hrxq_ibv_verify(dev);
 	if (ret)
 		DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -1139,8 +1139,8 @@
 	claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
 	if (vf && config.vf_nl_en)
 		mlx5_nl_mac_addr_sync(eth_dev);
-	priv->mnl_socket = mlx5_flow_tcf_socket_create();
-	if (!priv->mnl_socket) {
+	priv->tcf_context = mlx5_flow_tcf_context_create();
+	if (!priv->tcf_context) {
 		err = -rte_errno;
 		DRV_LOG(WARNING,
 			"flow rules relying on switch offloads will not be"
@@ -1155,7 +1155,7 @@
 			error.message =
 				"cannot retrieve network interface index";
 		} else {
-			err = mlx5_flow_tcf_init(priv->mnl_socket, ifindex,
+			err = mlx5_flow_tcf_init(priv->tcf_context, ifindex,
 						&error);
 		}
 		if (err) {
@@ -1163,8 +1163,8 @@
 				"flow rules relying on switch offloads will"
 				" not be supported: %s: %s",
 				error.message, strerror(rte_errno));
-			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
-			priv->mnl_socket = NULL;
+			mlx5_flow_tcf_context_destroy(priv->tcf_context);
+			priv->tcf_context = NULL;
 		}
 	}
 	TAILQ_INIT(&priv->flows);
@@ -1219,8 +1219,8 @@
 			close(priv->nl_socket_route);
 		if (priv->nl_socket_rdma >= 0)
 			close(priv->nl_socket_rdma);
-		if (priv->mnl_socket)
-			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+		if (priv->tcf_context)
+			mlx5_flow_tcf_context_destroy(priv->tcf_context);
 		if (own_domain_id)
 			claim_zero(rte_eth_switch_domain_free(priv->domain_id));
 		rte_free(priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 2dec88a..d14239c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -169,7 +169,7 @@ struct mlx5_drop {
 	struct mlx5_rxq_ibv *rxq; /* Verbs Rx queue. */
 };
 
-struct mnl_socket;
+struct mlx5_flow_tcf_context;
 
 struct priv {
 	LIST_ENTRY(priv) mem_event_cb; /* Called by memory event callback. */
@@ -236,7 +236,7 @@ struct priv {
 	rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
 	/* UAR same-page access control required in 32bit implementations. */
 #endif
-	struct mnl_socket *mnl_socket; /* Libmnl socket. */
+	struct mlx5_flow_tcf_context *tcf_context; /* TC flower context. */
 };
 
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 094f666..bb5b5cc 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -239,7 +239,6 @@ struct rte_flow {
 	struct rte_flow_action_rss rss;/**< RSS context. */
 	uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
 	uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
-	void *nl_flow; /**< Netlink flow buffer if relevant. */
 	LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
 	/**< Device flows that are part of the flow. */
 	uint32_t actions; /**< Bit-fields which mark all detected actions. */
@@ -346,9 +345,9 @@ int mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
 
 /* mlx5_flow_tcf.c */
 
-int mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
-		       struct rte_flow_error *error);
-struct mnl_socket *mlx5_flow_tcf_socket_create(void);
-void mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl);
+int mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
+		       unsigned int ifindex, struct rte_flow_error *error);
+struct mlx5_flow_tcf_context *mlx5_flow_tcf_context_create(void);
+void mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx);
 
 #endif /* RTE_PMD_MLX5_FLOW_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 4b51a85..c9dbbc3 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -231,6 +231,19 @@ struct tc_pedit_sel {
 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
 #endif
 
+/**
+ * Structure for holding netlink context.
+ * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
+ * Using this (8KB) buffer size ensures that netlink messages will never be
+ * truncated.
+ */
+struct mlx5_flow_tcf_context {
+	struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
+	uint32_t seq; /* Message sequence number. */
+	uint32_t buf_size; /* Message buffer size. */
+	uint8_t *buf; /* Message buffer. */
+};
+
 /** Empty masks for known item types. */
 static const union {
 	struct rte_flow_item_port_id port_id;
@@ -1956,8 +1969,8 @@ struct pedit_parser {
 /**
  * Send Netlink message with acknowledgment.
  *
- * @param nl
- *   Libmnl socket to use.
+ * @param ctx
+ *   Flow context to use.
  * @param nlh
  *   Message to send. This function always raises the NLM_F_ACK flag before
  *   sending.
@@ -1966,12 +1979,13 @@ struct pedit_parser {
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
+flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
 {
 	alignas(struct nlmsghdr)
 	uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
 		    nlh->nlmsg_len - sizeof(*nlh)];
-	uint32_t seq = random();
+	uint32_t seq = ctx->seq++;
+	struct mnl_socket *nl = ctx->nl;
 	int ret;
 
 	nlh->nlmsg_flags |= NLM_F_ACK;
@@ -2006,7 +2020,7 @@ struct pedit_parser {
 	       struct rte_flow_error *error)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct mnl_socket *nl = priv->mnl_socket;
+	struct mlx5_flow_tcf_context *nl = priv->tcf_context;
 	struct mlx5_flow *dev_flow;
 	struct nlmsghdr *nlh;
 
@@ -2035,7 +2049,7 @@ struct pedit_parser {
 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct mnl_socket *nl = priv->mnl_socket;
+	struct mlx5_flow_tcf_context *nl = priv->tcf_context;
 	struct mlx5_flow *dev_flow;
 	struct nlmsghdr *nlh;
 
@@ -2087,10 +2101,47 @@ struct pedit_parser {
 };
 
 /**
- * Initialize ingress qdisc of a given network interface.
+ * Create and configure a libmnl socket for Netlink flow rules.
+ *
+ * @return
+ *   A valid libmnl socket object pointer on success, NULL otherwise and
+ *   rte_errno is set.
+ */
+static struct mnl_socket *
+flow_tcf_mnl_socket_create(void)
+{
+	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
+
+	if (nl) {
+		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
+				      sizeof(int));
+		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
+			return nl;
+	}
+	rte_errno = errno;
+	if (nl)
+		mnl_socket_close(nl);
+	return NULL;
+}
+
+/**
+ * Destroy a libmnl socket.
  *
  * @param nl
  *   Libmnl socket of the @p NETLINK_ROUTE kind.
+ */
+static void
+flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
+{
+	if (nl)
+		mnl_socket_close(nl);
+}
+
+/**
+ * Initialize ingress qdisc of a given network interface.
+ *
+ * @param nl
+ *   Pointer to tc-flower context to use.
  * @param ifindex
  *   Index of network interface to initialize.
  * @param[out] error
@@ -2100,8 +2151,8 @@ struct pedit_parser {
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
-		   struct rte_flow_error *error)
+mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *nl,
+		   unsigned int ifindex, struct rte_flow_error *error)
 {
 	struct nlmsghdr *nlh;
 	struct tcmsg *tcm;
@@ -2143,37 +2194,47 @@ struct pedit_parser {
 }
 
 /**
- * Create and configure a libmnl socket for Netlink flow rules.
+ * Create libmnl context for Netlink flow rules.
  *
  * @return
  *   A valid libmnl socket object pointer on success, NULL otherwise and
  *   rte_errno is set.
  */
-struct mnl_socket *
-mlx5_flow_tcf_socket_create(void)
+struct mlx5_flow_tcf_context *
+mlx5_flow_tcf_context_create(void)
 {
-	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
-
-	if (nl) {
-		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
-				      sizeof(int));
-		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
-			return nl;
-	}
-	rte_errno = errno;
-	if (nl)
-		mnl_socket_close(nl);
+	struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
+							sizeof(*ctx),
+							sizeof(uint32_t));
+	if (!ctx)
+		goto error;
+	ctx->nl = flow_tcf_mnl_socket_create();
+	if (!ctx->nl)
+		goto error;
+	ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
+	ctx->buf = rte_zmalloc(__func__,
+			       ctx->buf_size, sizeof(uint32_t));
+	if (!ctx->buf)
+		goto error;
+	ctx->seq = random();
+	return ctx;
+error:
+	mlx5_flow_tcf_context_destroy(ctx);
 	return NULL;
 }
 
 /**
- * Destroy a libmnl socket.
+ * Destroy a libmnl context.
  *
  * @param nl
  *   Libmnl socket of the @p NETLINK_ROUTE kind.
  */
 void
-mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
+mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
 {
-	mnl_socket_close(nl);
+	if (!ctx)
+		return;
+	flow_tcf_mnl_socket_destroy(ctx->nl);
+	rte_free(ctx->buf);
+	rte_free(ctx);
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* [dpdk-dev] [PATCH v4 2/3] net/mlx5: add flow query abstraction interface
  2018-10-16 23:50   ` [dpdk-dev] [PATCH v3 2/2] net/mlx5: support e-switch flow count action Mordechay Haimovsky
  2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 0/3] " Moti Haimovsky
  2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 1/3] net/mlx5: refactor TC-flow infrastructure Moti Haimovsky
@ 2018-10-17 17:24     ` Moti Haimovsky
  2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 3/3] net/mlx5: support e-switch flow count action Moti Haimovsky
  3 siblings, 0 replies; 21+ messages in thread
From: Moti Haimovsky @ 2018-10-17 17:24 UTC (permalink / raw)
  To: shahafs; +Cc: dev, Moti Haimovsky
Flow engine now supports multiple driver paths with each having
its own flow query implantation routine.
This patch adds an abstraction to the flow query routine in accordance
to commit 0c76d1c9a18d ("net/mlx5: add abstraction for multiple flow
drivers") done by Yongseok Koh.
Signed-off-by: Moti Haimovsky <motih@mellanox.com>
---
v4:
 * First rlease of this patch.
---
 drivers/net/mlx5/mlx5_flow.c       | 99 ++++++++++++--------------------------
 drivers/net/mlx5/mlx5_flow.h       |  6 +++
 drivers/net/mlx5/mlx5_flow_dv.c    | 19 ++++++++
 drivers/net/mlx5/mlx5_flow_tcf.c   | 18 +++++++
 drivers/net/mlx5/mlx5_flow_verbs.c | 87 +++++++++++++++++++++++++++++++++
 5 files changed, 162 insertions(+), 67 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index bd70fce..fcabab0 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1653,6 +1653,17 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 {
 }
 
+static int
+flow_null_query(struct rte_eth_dev *dev __rte_unused,
+		struct rte_flow *flow __rte_unused,
+		const struct rte_flow_action *actions __rte_unused,
+		void *data __rte_unused,
+		struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
 /* Void driver to protect from null pointer reference. */
 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
 	.validate = flow_null_validate,
@@ -1661,6 +1672,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 	.apply = flow_null_apply,
 	.remove = flow_null_remove,
 	.destroy = flow_null_destroy,
+	.query = flow_null_query,
 };
 
 /**
@@ -2344,92 +2356,45 @@ struct rte_flow *
 }
 
 /**
- * Query flow counter.
- *
- * @param flow
- *   Pointer to the flow.
+ * Query a flow.
  *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ * @see rte_flow_query()
+ * @see rte_flow_ops
  */
 static int
-mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
-		      void *data __rte_unused,
-		      struct rte_flow_error *error)
+flow_drv_query(struct rte_eth_dev *dev,
+	       struct rte_flow *flow,
+	       const struct rte_flow_action *actions,
+	       void *data,
+	       struct rte_flow_error *error)
 {
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-	if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
-		struct rte_flow_query_count *qc = data;
-		uint64_t counters[2] = {0, 0};
-		struct ibv_query_counter_set_attr query_cs_attr = {
-			.cs = flow->counter->cs,
-			.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
-		};
-		struct ibv_counter_set_data query_out = {
-			.out = counters,
-			.outlen = 2 * sizeof(uint64_t),
-		};
-		int err = mlx5_glue->query_counter_set(&query_cs_attr,
-						       &query_out);
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type ftype = flow->drv_type;
 
-		if (err)
-			return rte_flow_error_set
-				(error, err,
-				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				 NULL,
-				 "cannot read counter");
-		qc->hits_set = 1;
-		qc->bytes_set = 1;
-		qc->hits = counters[0] - flow->counter->hits;
-		qc->bytes = counters[1] - flow->counter->bytes;
-		if (qc->reset) {
-			flow->counter->hits = counters[0];
-			flow->counter->bytes = counters[1];
-		}
-		return 0;
-	}
-	return rte_flow_error_set(error, EINVAL,
-				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				  NULL,
-				  "flow does not have counter");
-#endif
-	return rte_flow_error_set(error, ENOTSUP,
-				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				  NULL,
-				  "counters are not available");
+	assert(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(ftype);
+
+	return fops->query(dev, flow, actions, data, error);
 }
 
 /**
- * Query a flows.
+ * Query a flow.
  *
  * @see rte_flow_query()
  * @see rte_flow_ops
  */
 int
-mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
+mlx5_flow_query(struct rte_eth_dev *dev,
 		struct rte_flow *flow,
 		const struct rte_flow_action *actions,
 		void *data,
 		struct rte_flow_error *error)
 {
-	int ret = 0;
+	int ret;
 
-	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
-		switch (actions->type) {
-		case RTE_FLOW_ACTION_TYPE_VOID:
-			break;
-		case RTE_FLOW_ACTION_TYPE_COUNT:
-			ret = mlx5_flow_query_count(flow, data, error);
-			break;
-		default:
-			return rte_flow_error_set(error, ENOTSUP,
-						  RTE_FLOW_ERROR_TYPE_ACTION,
-						  actions,
-						  "action not supported");
-		}
-		if (ret < 0)
-			return ret;
-	}
+	ret = flow_drv_query(dev, flow, actions, data, error);
+	if (ret < 0)
+		return ret;
 	return 0;
 }
 
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index bb5b5cc..69f55cf 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -264,6 +264,11 @@ typedef void (*mlx5_flow_remove_t)(struct rte_eth_dev *dev,
 				   struct rte_flow *flow);
 typedef void (*mlx5_flow_destroy_t)(struct rte_eth_dev *dev,
 				    struct rte_flow *flow);
+typedef int (*mlx5_flow_query_t)(struct rte_eth_dev *dev,
+				 struct rte_flow *flow,
+				 const struct rte_flow_action *actions,
+				 void *data,
+				 struct rte_flow_error *error);
 struct mlx5_flow_driver_ops {
 	mlx5_flow_validate_t validate;
 	mlx5_flow_prepare_t prepare;
@@ -271,6 +276,7 @@ struct mlx5_flow_driver_ops {
 	mlx5_flow_apply_t apply;
 	mlx5_flow_remove_t remove;
 	mlx5_flow_destroy_t destroy;
+	mlx5_flow_query_t query;
 };
 
 /* mlx5_flow.c */
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index becbc57..58e3c33 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -1363,6 +1363,24 @@
 	}
 }
 
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_dv_query(struct rte_eth_dev *dev __rte_unused,
+	      struct rte_flow *flow __rte_unused,
+	      const struct rte_flow_action *actions __rte_unused,
+	      void *data __rte_unused,
+	      struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
+
 const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = {
 	.validate = flow_dv_validate,
 	.prepare = flow_dv_prepare,
@@ -1370,6 +1388,7 @@
 	.apply = flow_dv_apply,
 	.remove = flow_dv_remove,
 	.destroy = flow_dv_destroy,
+	.query = flow_dv_query,
 };
 
 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index c9dbbc3..db05750 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -2091,6 +2091,23 @@ struct pedit_parser {
 	rte_free(dev_flow);
 }
 
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_tcf_query(struct rte_eth_dev *dev __rte_unused,
+	       struct rte_flow *flow __rte_unused,
+	       const struct rte_flow_action *actions __rte_unused,
+	       void *data __rte_unused,
+	       struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
 	.validate = flow_tcf_validate,
 	.prepare = flow_tcf_prepare,
@@ -2098,6 +2115,7 @@ struct pedit_parser {
 	.apply = flow_tcf_apply,
 	.remove = flow_tcf_remove,
 	.destroy = flow_tcf_destroy,
+	.query = flow_tcf_query,
 };
 
 /**
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 65c849c..4ae974b 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -1651,6 +1651,92 @@
 	return -rte_errno;
 }
 
+/**
+ * Query a flows.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_verbs_query_count(struct rte_eth_dev *dev __rte_unused,
+		       struct rte_flow *flow __rte_unused,
+		       void *data __rte_unused,
+		       struct rte_flow_error *error)
+{
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+	if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
+		struct rte_flow_query_count *qc = data;
+		uint64_t counters[2] = {0, 0};
+		struct ibv_query_counter_set_attr query_cs_attr = {
+			.cs = flow->counter->cs,
+			.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
+		};
+		struct ibv_counter_set_data query_out = {
+			.out = counters,
+			.outlen = 2 * sizeof(uint64_t),
+		};
+		int err = mlx5_glue->query_counter_set(&query_cs_attr,
+						       &query_out);
+
+		if (err)
+			return rte_flow_error_set
+				(error, err,
+				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				 NULL,
+				 "cannot read counter");
+		qc->hits_set = 1;
+		qc->bytes_set = 1;
+		qc->hits = counters[0] - flow->counter->hits;
+		qc->bytes = counters[1] - flow->counter->bytes;
+		if (qc->reset) {
+			flow->counter->hits = counters[0];
+			flow->counter->bytes = counters[1];
+		}
+		return 0;
+	}
+	return rte_flow_error_set(error, EINVAL,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				  NULL,
+				  "flow does not have counter");
+#endif
+	return rte_flow_error_set(error, ENOTSUP,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				  NULL,
+				  "counters are not available");
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_verbs_query(struct rte_eth_dev *dev,
+		 struct rte_flow *flow,
+		 const struct rte_flow_action *actions,
+		 void *data,
+		 struct rte_flow_error *error)
+{
+	int ret = -EINVAL;
+
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			ret = flow_verbs_query_count(dev, flow, data, error);
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	return ret;
+}
+
 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
 	.validate = flow_verbs_validate,
 	.prepare = flow_verbs_prepare,
@@ -1658,4 +1744,5 @@
 	.apply = flow_verbs_apply,
 	.remove = flow_verbs_remove,
 	.destroy = flow_verbs_destroy,
+	.query = flow_verbs_query,
 };
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* [dpdk-dev] [PATCH v4 3/3] net/mlx5: support e-switch flow count action
  2018-10-16 23:50   ` [dpdk-dev] [PATCH v3 2/2] net/mlx5: support e-switch flow count action Mordechay Haimovsky
                       ` (2 preceding siblings ...)
  2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 2/3] net/mlx5: add flow query abstraction interface Moti Haimovsky
@ 2018-10-17 17:24     ` Moti Haimovsky
  2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 0/4] " Moti Haimovsky
                         ` (4 more replies)
  3 siblings, 5 replies; 21+ messages in thread
From: Moti Haimovsky @ 2018-10-17 17:24 UTC (permalink / raw)
  To: shahafs; +Cc: dev, Moti Haimovsky
This commit adds support for configuring flows destined to the mlx5
eswitch with 'count' action and for querying these counts at runtime.
Each flow rule configured by the mlx5 driver is implicitly assigned
with flow counters. These counters can be retrieved when querying
the flow rule via Netlink, they can be found in each flow action
section of the reply. Hence, supporting the 'count' action in the
flow configuration command is straight-forward. When transposing
the command to a tc Netlink message we just ignore it instead of
rejecting it.
In the 'flow query count' side, the command now uses tc Netlink
query command in order to retrieve the values of the flow counters.
Signed-off-by: Moti Haimovsky <motih@mellanox.com>
---
v4:
 * Split the patch in two.
   This patch now contains only the tcf implementation of count action
   and query.
 * Fixed compilation error found on "old" kernels.
 * Modifications according to inputs from Shahaf S.
v3:
 * Rebase on top of
   d80c8167c4fe ("net/mlx5: fix compilation issue on ARM SOC")
 * Code modifications accordig to review by Shahaf S.
   see message ID: 1539263057-16678-3-git-send-email-motih@mellanox.com
v2:
 * Rebase on top of 3f4722ee01e7 ("net/mlx5: refactor TC-flow infrastructure")
---
 drivers/net/mlx5/mlx5_flow_tcf.c | 503 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 496 insertions(+), 7 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index db05750..0dcea5a 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -6,6 +6,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <libmnl/libmnl.h>
+#include <linux/gen_stats.h>
 #include <linux/if_ether.h>
 #include <linux/netlink.h>
 #include <linux/pkt_cls.h>
@@ -26,6 +27,7 @@
 #include <rte_ether.h>
 #include <rte_flow.h>
 #include <rte_malloc.h>
+#include <rte_common.h>
 
 #include "mlx5.h"
 #include "mlx5_flow.h"
@@ -215,6 +217,15 @@ struct tc_pedit_sel {
 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
 #endif
+/* TCA_FLOWER_MAX contains the value of the last element in the
+ * TC flower classifier enum list we use. Currently the last element we use
+ * is TCA_FLOWER_KEY_TCP_FLAGS_MASK.
+ * Change this definition if adding elements from that table that are beyond
+ * TCA_FLOWER_KEY_TCP_FLAGS_MASK.
+ */
+#ifndef TCA_FLOWER_MAX
+#define TCA_FLOWER_MAX TCA_FLOWER_KEY_TCP_FLAGS_MASK
+#endif
 #ifndef HAVE_TC_ACT_GOTO_CHAIN
 #define TC_ACT_GOTO_CHAIN 0x20000000
 #endif
@@ -244,6 +255,14 @@ struct mlx5_flow_tcf_context {
 	uint8_t *buf; /* Message buffer. */
 };
 
+/** Structure used when extracting the values of a flow counters
+ * from a netlink message.
+ */
+struct flow_tcf_stats_basic {
+	bool valid;
+	struct gnet_stats_basic counters;
+};
+
 /** Empty masks for known item types. */
 static const union {
 	struct rte_flow_item_port_id port_id;
@@ -356,6 +375,43 @@ struct pedit_parser {
 	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
 };
 
+/**
+ * Create space for using the implicitly created TC flow counter.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ *
+ * @return
+ *   A pointer to the counter data structure, NULL otherwise and
+ *   rte_errno is set.
+ */
+static struct mlx5_flow_counter *
+flow_tcf_counter_new(void)
+{
+	struct mlx5_flow_counter *cnt;
+
+	/*
+	 * eswitch counter cannot be shared and its id is unknown.
+	 * currently returning all with id 0.
+	 * in the future maybe better to switch to unique numbers.
+	 */
+	struct mlx5_flow_counter tmpl = {
+		.ref_cnt = 1,
+		.shared = 0,
+		.id = 0,
+		.cs = NULL,
+		.hits = 0,
+		.bytes = 0,
+	};
+	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
+	if (!cnt) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	*cnt = tmpl;
+	/* Implicit counter, do not add to list. */
+	return cnt;
+}
 
 /**
  * Set pedit key of transport (TCP/UDP) port value
@@ -1067,6 +1123,8 @@ struct pedit_parser {
 		case RTE_FLOW_ACTION_TYPE_DROP:
 			current_action_flag = MLX5_FLOW_ACTION_DROP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
 			break;
@@ -1342,6 +1400,8 @@ struct pedit_parser {
 				SZ_NLATTR_TYPE_OF(struct tc_gact);
 			flags |= MLX5_FLOW_ACTION_DROP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
 			goto action_of_vlan;
@@ -1477,6 +1537,38 @@ struct pedit_parser {
 }
 
 /**
+ * Make adjustments for supporting count actions.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 On success else a negative errno value is returned and rte_errno is set.
+ */
+static int
+flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
+				  struct mlx5_flow *dev_flow,
+				  struct rte_flow_error *error)
+{
+	struct rte_flow *flow = dev_flow->flow;
+
+	if (!flow->counter) {
+		flow->counter = flow_tcf_counter_new();
+		if (!flow->counter)
+			return rte_flow_error_set(error, rte_errno,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  NULL,
+						  "cannot get counter"
+						  " context.");
+	}
+	return 0;
+}
+
+/**
  * Translate flow for Linux TC flower and construct Netlink message.
  *
  * @param[in] priv
@@ -1533,6 +1625,7 @@ struct pedit_parser {
 	struct nlattr *na_vlan_id = NULL;
 	struct nlattr *na_vlan_priority = NULL;
 	uint64_t item_flags = 0;
+	int ret;
 
 	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
 						PTOI_TABLE_SZ_MAX(dev)));
@@ -1875,6 +1968,16 @@ struct pedit_parser {
 			mnl_attr_nest_end(nlh, na_act);
 			mnl_attr_nest_end(nlh, na_act_index);
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			/*
+			 * Driver adds the count action implicitly for
+			 * each rule it creates.
+			 */
+			ret = flow_tcf_translate_action_count(dev,
+							      dev_flow, error);
+			if (ret < 0)
+				return ret;
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			conf.of_push_vlan = NULL;
 			vlan_act = TCA_VLAN_ACT_POP;
@@ -2055,6 +2158,12 @@ struct pedit_parser {
 
 	if (!flow)
 		return;
+	if (flow->counter) {
+		if (--flow->counter->ref_cnt == 0) {
+			rte_free(flow->counter);
+			flow->counter = NULL;
+		}
+	}
 	dev_flow = LIST_FIRST(&flow->dev_flows);
 	if (!dev_flow)
 		return;
@@ -2092,20 +2201,400 @@ struct pedit_parser {
 }
 
 /**
+ * Parse rtnetlink message attributes filling the attribute table with the info
+ * retrieved.
+ *
+ * @param tb
+ *   Attribute table to be filled.
+ * @param[out] max
+ *   Maxinum entry in the attribute table.
+ * @param rte
+ *   The attributes section in the message to be parsed.
+ * @param len
+ *   The length of the attributes section in the message.
+ */
+static void
+flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
+			 struct rtattr *rta, int len)
+{
+	unsigned short type;
+	memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+	while (RTA_OK(rta, len)) {
+		type = rta->rta_type;
+		if (type <= max && !tb[type])
+			tb[type] = rta;
+		rta = RTA_NEXT(rta, len);
+	}
+}
+
+/**
+ * Extract flow counters from flower action.
+ *
+ * @param rta
+ *   flower action stats properties in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data holding the count statistics of the rte_flow retrieved from
+ *   the message.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
+				       uint16_t rta_type[], int idx,
+				       struct gnet_stats_basic *data)
+{
+	struct rtattr *tbs[TCA_STATS_MAX + 1];
+
+	if (rta == NULL || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tbs, TCA_STATS_MAX,
+				 RTA_DATA(rta), RTA_PAYLOAD(rta));
+	switch (rta_type[idx]) {
+	case TCA_STATS_BASIC:
+		if (tbs[TCA_STATS_BASIC]) {
+			memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
+			       RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
+			       sizeof(*data)));
+			return 0;
+		}
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+/**
+ * Parse flower single action retrieving the requested action attribute,
+ * if found.
+ *
+ * @param arg
+ *   flower action properties in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   Count statistics retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
+				     uint16_t rta_type[], int idx, void *data)
+{
+	struct rtattr *tb[TCA_ACT_MAX + 1];
+
+	if (arg == NULL || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX,
+				 RTA_DATA(arg), RTA_PAYLOAD(arg));
+	if (tb[TCA_ACT_KIND] == NULL)
+		return -1;
+	switch (rta_type[idx]) {
+	case TCA_ACT_STATS:
+		if (tb[TCA_ACT_STATS])
+			return flow_tcf_nl_action_stats_parse_and_get
+					(tb[TCA_ACT_STATS],
+					 rta_type, --idx,
+					 (struct gnet_stats_basic *)data);
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+/**
+ * Parse flower action section in the message retrieving the requested
+ * attribute from the first action that provides it.
+ *
+ * @param opt
+ *   flower section in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_action_parse_and_get(const struct rtattr *arg,
+				 uint16_t rta_type[], int idx, void *data)
+{
+	struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
+	int i;
+
+	if (arg == NULL || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
+				 RTA_DATA(arg), RTA_PAYLOAD(arg));
+	switch (rta_type[idx]) {
+	/*
+	 * flow counters are stored in the actions defined by the flow
+	 * and not in the flow itself, therefore we need to traverse the
+	 * flower chain of actions in search for them.
+	 *
+	 * Note that the index is not decremented here.
+	 */
+	case TCA_ACT_STATS:
+		for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
+			if (tb[i] &&
+			!flow_tcf_nl_parse_one_action_and_get(tb[i],
+							      rta_type,
+							      idx, data))
+				return 0;
+		}
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+/**
+ * Parse flower classifier options in the message, retrieving the requested
+ * attribute if found.
+ *
+ * @param opt
+ *   flower section in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
+			       uint16_t rta_type[], int idx, void *data)
+{
+	struct rtattr *tb[TCA_FLOWER_MAX + 1];
+
+	if (!opt || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, TCA_FLOWER_MAX,
+				 RTA_DATA(opt), RTA_PAYLOAD(opt));
+	switch (rta_type[idx]) {
+	case TCA_FLOWER_ACT:
+		if (tb[TCA_FLOWER_ACT])
+			return flow_tcf_nl_action_parse_and_get
+							(tb[TCA_FLOWER_ACT],
+							 rta_type, --idx, data);
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+/**
+ * Parse Netlink reply on filter query, retrieving the flow counters.
+ *
+ * @param nlh
+ *   Message received from Netlink.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_filter_parse_and_get(const struct nlmsghdr *nlh,
+				 uint16_t rta_type[], int idx, void *data)
+{
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len = nlh->nlmsg_len;
+	struct rtattr *tb[TCA_MAX + 1];
+
+	if (idx < 0)
+		return -1;
+	if (nlh->nlmsg_type != RTM_NEWTFILTER &&
+	    nlh->nlmsg_type != RTM_GETTFILTER &&
+	    nlh->nlmsg_type != RTM_DELTFILTER)
+		return -1;
+	len -= NLMSG_LENGTH(sizeof(*t));
+	if (len < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len);
+	/* Not a TC flower flow - bail out */
+	if (!tb[TCA_KIND] ||
+	    strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
+		return -1;
+	switch (rta_type[idx]) {
+	case TCA_OPTIONS:
+		if (tb[TCA_OPTIONS])
+			return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
+							      rta_type,
+							      --idx, data);
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+/**
+ * A callback to parse Netlink reply on TC flower query.
+ *
+ * @param nlh
+ *   Message received from Netlink.
+ * @param[out] data
+ *   Pointer to data area to be filled by the parsing routine.
+ *   assumed to be a pinter to struct flow_tcf_stats_basic.
+ *
+ * @return
+ *   MNL_CB_OK value.
+ */
+static int
+flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
+{
+	/*
+	 * The backward sequence of rta_types to pass in order to get
+	 *  to the counters.
+	 */
+	uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
+				TCA_FLOWER_ACT, TCA_OPTIONS };
+	struct flow_tcf_stats_basic *sb_data = data;
+
+	if (!flow_tcf_nl_filter_parse_and_get(nlh, rta_type,
+					      RTE_DIM(rta_type) - 1,
+					      (void *)&sb_data->counters))
+		sb_data->valid = true;
+	return MNL_CB_OK;
+}
+
+/**
+ * Query a TC flower rule for its statistics via netlink.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] flow
+ *   Pointer to the sub flow.
+ * @param[out] data
+ *   data retrieved by the query.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_tcf_query_count(struct rte_eth_dev *dev,
+			  struct rte_flow *flow,
+			  void *data,
+			  struct rte_flow_error *error)
+{
+	struct flow_tcf_stats_basic sb_data = { 0 };
+	struct rte_flow_query_count *qc = data;
+	struct priv *priv = dev->data->dev_private;
+	struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
+	struct mnl_socket *nl = ctx->nl;
+	struct mlx5_flow *dev_flow;
+	struct nlmsghdr *nlh;
+	uint32_t seq = priv->tcf_context->seq++;
+	ssize_t ret;
+	assert(qc);
+
+	dev_flow = LIST_FIRST(&flow->dev_flows);
+	/* E-Switch flow can't be expanded. */
+	assert(!LIST_NEXT(dev_flow, next));
+	if (!dev_flow->flow->counter)
+		goto notsup_exit;
+	nlh = dev_flow->tcf.nlh;
+	nlh->nlmsg_type = RTM_GETTFILTER;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
+	nlh->nlmsg_seq = seq;
+	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
+		goto error_exit;
+	do {
+		ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
+		if (ret <= 0)
+			break;
+		ret = mnl_cb_run(ctx->buf, ret, seq,
+				 mnl_socket_get_portid(nl),
+				 flow_tcf_nl_message_get_stats_basic,
+				 (void *)&sb_data);
+	} while (ret > 0);
+	/* Return the delta from last reset. */
+	if (sb_data.valid) {
+		/* Return the delta from last reset. */
+		qc->hits_set = 1;
+		qc->bytes_set = 1;
+		qc->hits = sb_data.counters.packets;
+		qc->hits -= flow->counter->hits;
+		qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
+		if (qc->reset) {
+			flow->counter->hits = sb_data.counters.packets;
+			flow->counter->bytes = sb_data.counters.bytes;
+		}
+		return 0;
+	}
+	return rte_flow_error_set(error, EINVAL,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				  NULL,
+				  "flow does not have counter");
+error_exit:
+	return rte_flow_error_set
+			(error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			 NULL, "netlink: failed to read flow rule counters");
+notsup_exit:
+	return rte_flow_error_set
+			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			 NULL, "counters are not available.");
+}
+
+/**
  * Query a flow.
  *
  * @see rte_flow_query()
  * @see rte_flow_ops
  */
 static int
-flow_tcf_query(struct rte_eth_dev *dev __rte_unused,
-	       struct rte_flow *flow __rte_unused,
-	       const struct rte_flow_action *actions __rte_unused,
-	       void *data __rte_unused,
-	       struct rte_flow_error *error __rte_unused)
+flow_tcf_query(struct rte_eth_dev *dev,
+	       struct rte_flow *flow,
+	       const struct rte_flow_action *actions,
+	       void *data,
+	       struct rte_flow_error *error)
 {
-	rte_errno = ENOTSUP;
-	return -rte_errno;
+	int ret = -EINVAL;
+
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			ret = flow_tcf_query_count(dev, flow, data, error);
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	return ret;
 }
 
 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* [dpdk-dev] [PATCH v5 0/4] support e-switch flow count action
  2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 3/3] net/mlx5: support e-switch flow count action Moti Haimovsky
@ 2018-10-18 18:29       ` Moti Haimovsky
  2018-10-21  8:23         ` Shahaf Shuler
  2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 1/4] net/mlx5: refactor TC-flow infrastructure Moti Haimovsky
                         ` (3 subsequent siblings)
  4 siblings, 1 reply; 21+ messages in thread
From: Moti Haimovsky @ 2018-10-18 18:29 UTC (permalink / raw)
  To: shahafs; +Cc: dev, Moti Haimovsky
The following series of patches adds support for reading the
mlx5 e-switch flow counters.
Moti Haimovsky (4):
  net/mlx5: refactor TC-flow infrastructure
  net/mlx5: tc flow to use the new infrastructure
  net/mlx5: add flow query abstraction interface
  net/mlx5: support e-switch flow count action
 drivers/net/mlx5/mlx5.c            |  20 +-
 drivers/net/mlx5/mlx5.h            |   4 +-
 drivers/net/mlx5/mlx5_flow.c       |  99 ++----
 drivers/net/mlx5/mlx5_flow.h       |  15 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |  19 ++
 drivers/net/mlx5/mlx5_flow_tcf.c   | 644 +++++++++++++++++++++++++++++++++++--
 drivers/net/mlx5/mlx5_flow_verbs.c |  87 +++++
 7 files changed, 773 insertions(+), 115 deletions(-)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* [dpdk-dev] [PATCH v5 1/4] net/mlx5: refactor TC-flow infrastructure
  2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 3/3] net/mlx5: support e-switch flow count action Moti Haimovsky
  2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 0/4] " Moti Haimovsky
@ 2018-10-18 18:29       ` Moti Haimovsky
  2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 2/4] net/mlx5: tc flow to use the new infrastructure Moti Haimovsky
                         ` (2 subsequent siblings)
  4 siblings, 0 replies; 21+ messages in thread
From: Moti Haimovsky @ 2018-10-18 18:29 UTC (permalink / raw)
  To: shahafs; +Cc: dev, Moti Haimovsky
This commit refactors tc_flow as a preparation to coming commits
that sends different type of messages and expect differ type of replies
while still using the same underlying routines.
Signed-off-by: Moti Haimovsky <motih@mellanox.com>
---
v5:
 * Split the patch, this patch introduces the new infrastructure
   and performs only basic initializations.
   data path modifications are now performed in a separate patch.
v3:
 * Rebase on top of
   d80c8167c4fe ("net/mlx5: fix compilation issue on ARM SOC")
v2:
 * Rebase on top of 3f4722ee01e7 ("net/mlx5: refactor TC-flow infrastructure")
---
 drivers/net/mlx5/mlx5.c          |  20 ++++----
 drivers/net/mlx5/mlx5.h          |   4 +-
 drivers/net/mlx5/mlx5_flow.h     |   9 ++--
 drivers/net/mlx5/mlx5_flow_tcf.c | 105 +++++++++++++++++++++++++++++++--------
 4 files changed, 99 insertions(+), 39 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 795a219..2c96080 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -286,8 +286,8 @@
 		close(priv->nl_socket_route);
 	if (priv->nl_socket_rdma >= 0)
 		close(priv->nl_socket_rdma);
-	if (priv->mnl_socket)
-		mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+	if (priv->tcf_context)
+		mlx5_flow_tcf_context_destroy(priv->tcf_context);
 	ret = mlx5_hrxq_ibv_verify(dev);
 	if (ret)
 		DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -1139,8 +1139,8 @@
 	claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
 	if (vf && config.vf_nl_en)
 		mlx5_nl_mac_addr_sync(eth_dev);
-	priv->mnl_socket = mlx5_flow_tcf_socket_create();
-	if (!priv->mnl_socket) {
+	priv->tcf_context = mlx5_flow_tcf_context_create();
+	if (!priv->tcf_context) {
 		err = -rte_errno;
 		DRV_LOG(WARNING,
 			"flow rules relying on switch offloads will not be"
@@ -1155,16 +1155,16 @@
 			error.message =
 				"cannot retrieve network interface index";
 		} else {
-			err = mlx5_flow_tcf_init(priv->mnl_socket, ifindex,
-						&error);
+			err = mlx5_flow_tcf_init(priv->tcf_context,
+						 ifindex, &error);
 		}
 		if (err) {
 			DRV_LOG(WARNING,
 				"flow rules relying on switch offloads will"
 				" not be supported: %s: %s",
 				error.message, strerror(rte_errno));
-			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
-			priv->mnl_socket = NULL;
+			mlx5_flow_tcf_context_destroy(priv->tcf_context);
+			priv->tcf_context = NULL;
 		}
 	}
 	TAILQ_INIT(&priv->flows);
@@ -1219,8 +1219,8 @@
 			close(priv->nl_socket_route);
 		if (priv->nl_socket_rdma >= 0)
 			close(priv->nl_socket_rdma);
-		if (priv->mnl_socket)
-			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+		if (priv->tcf_context)
+			mlx5_flow_tcf_context_destroy(priv->tcf_context);
 		if (own_domain_id)
 			claim_zero(rte_eth_switch_domain_free(priv->domain_id));
 		rte_free(priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 2dec88a..d14239c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -169,7 +169,7 @@ struct mlx5_drop {
 	struct mlx5_rxq_ibv *rxq; /* Verbs Rx queue. */
 };
 
-struct mnl_socket;
+struct mlx5_flow_tcf_context;
 
 struct priv {
 	LIST_ENTRY(priv) mem_event_cb; /* Called by memory event callback. */
@@ -236,7 +236,7 @@ struct priv {
 	rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
 	/* UAR same-page access control required in 32bit implementations. */
 #endif
-	struct mnl_socket *mnl_socket; /* Libmnl socket. */
+	struct mlx5_flow_tcf_context *tcf_context; /* TC flower context. */
 };
 
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index d45fe8d..ee75a80 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -243,7 +243,6 @@ struct rte_flow {
 	struct rte_flow_action_rss rss;/**< RSS context. */
 	uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
 	uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
-	void *nl_flow; /**< Netlink flow buffer if relevant. */
 	LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
 	/**< Device flows that are part of the flow. */
 	uint32_t actions; /**< Bit-fields which mark all detected actions. */
@@ -350,9 +349,9 @@ int mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
 
 /* mlx5_flow_tcf.c */
 
-int mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
-		       struct rte_flow_error *error);
-struct mnl_socket *mlx5_flow_tcf_socket_create(void);
-void mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl);
+int mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
+		       unsigned int ifindex, struct rte_flow_error *error);
+struct mlx5_flow_tcf_context *mlx5_flow_tcf_context_create(void);
+void mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx);
 
 #endif /* RTE_PMD_MLX5_FLOW_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 131b62b..5ce2d5f 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -235,6 +235,19 @@ struct tc_pedit_sel {
 #define TTL_LEN 1
 #endif
 
+/**
+ * Structure for holding netlink context.
+ * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
+ * Using this (8KB) buffer size ensures that netlink messages will never be
+ * truncated.
+ */
+struct mlx5_flow_tcf_context {
+	struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
+	uint32_t seq; /* Message sequence number. */
+	uint32_t buf_size; /* Message buffer size. */
+	uint8_t *buf; /* Message buffer. */
+};
+
 /** Empty masks for known item types. */
 static const union {
 	struct rte_flow_item_port_id port_id;
@@ -2153,7 +2166,7 @@ struct pedit_parser {
 	       struct rte_flow_error *error)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct mnl_socket *nl = priv->mnl_socket;
+	struct mnl_socket *nl = priv->tcf_context->nl;
 	struct mlx5_flow *dev_flow;
 	struct nlmsghdr *nlh;
 
@@ -2182,7 +2195,7 @@ struct pedit_parser {
 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct mnl_socket *nl = priv->mnl_socket;
+	struct mnl_socket *nl = priv->tcf_context->nl;
 	struct mlx5_flow *dev_flow;
 	struct nlmsghdr *nlh;
 
@@ -2234,10 +2247,47 @@ struct pedit_parser {
 };
 
 /**
- * Initialize ingress qdisc of a given network interface.
+ * Create and configure a libmnl socket for Netlink flow rules.
+ *
+ * @return
+ *   A valid libmnl socket object pointer on success, NULL otherwise and
+ *   rte_errno is set.
+ */
+static struct mnl_socket *
+flow_tcf_mnl_socket_create(void)
+{
+	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
+
+	if (nl) {
+		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
+				      sizeof(int));
+		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
+			return nl;
+	}
+	rte_errno = errno;
+	if (nl)
+		mnl_socket_close(nl);
+	return NULL;
+}
+
+/**
+ * Destroy a libmnl socket.
  *
  * @param nl
  *   Libmnl socket of the @p NETLINK_ROUTE kind.
+ */
+static void
+flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
+{
+	if (nl)
+		mnl_socket_close(nl);
+}
+
+/**
+ * Initialize ingress qdisc of a given network interface.
+ *
+ * @param ctx
+ *   Pointer to tc-flower context to use.
  * @param ifindex
  *   Index of network interface to initialize.
  * @param[out] error
@@ -2247,11 +2297,12 @@ struct pedit_parser {
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
-		   struct rte_flow_error *error)
+mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
+		   unsigned int ifindex, struct rte_flow_error *error)
 {
 	struct nlmsghdr *nlh;
 	struct tcmsg *tcm;
+	struct mnl_socket *nl = ctx->nl;
 	alignas(struct nlmsghdr)
 	uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
 
@@ -2290,37 +2341,47 @@ struct pedit_parser {
 }
 
 /**
- * Create and configure a libmnl socket for Netlink flow rules.
+ * Create libmnl context for Netlink flow rules.
  *
  * @return
  *   A valid libmnl socket object pointer on success, NULL otherwise and
  *   rte_errno is set.
  */
-struct mnl_socket *
-mlx5_flow_tcf_socket_create(void)
+struct mlx5_flow_tcf_context *
+mlx5_flow_tcf_context_create(void)
 {
-	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
-
-	if (nl) {
-		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
-				      sizeof(int));
-		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
-			return nl;
-	}
-	rte_errno = errno;
-	if (nl)
-		mnl_socket_close(nl);
+	struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
+							sizeof(*ctx),
+							sizeof(uint32_t));
+	if (!ctx)
+		goto error;
+	ctx->nl = flow_tcf_mnl_socket_create();
+	if (!ctx->nl)
+		goto error;
+	ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
+	ctx->buf = rte_zmalloc(__func__,
+			       ctx->buf_size, sizeof(uint32_t));
+	if (!ctx->buf)
+		goto error;
+	ctx->seq = random();
+	return ctx;
+error:
+	mlx5_flow_tcf_context_destroy(ctx);
 	return NULL;
 }
 
 /**
- * Destroy a libmnl socket.
+ * Destroy a libmnl context.
  *
  * @param nl
  *   Libmnl socket of the @p NETLINK_ROUTE kind.
  */
 void
-mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
+mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
 {
-	mnl_socket_close(nl);
+	if (!ctx)
+		return;
+	flow_tcf_mnl_socket_destroy(ctx->nl);
+	rte_free(ctx->buf);
+	rte_free(ctx);
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* [dpdk-dev] [PATCH v5 2/4] net/mlx5: tc flow to use the new infrastructure
  2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 3/3] net/mlx5: support e-switch flow count action Moti Haimovsky
  2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 0/4] " Moti Haimovsky
  2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 1/4] net/mlx5: refactor TC-flow infrastructure Moti Haimovsky
@ 2018-10-18 18:29       ` Moti Haimovsky
  2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 3/4] net/mlx5: add flow query abstraction interface Moti Haimovsky
  2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 4/4] net/mlx5: support e-switch flow count action Moti Haimovsky
  4 siblings, 0 replies; 21+ messages in thread
From: Moti Haimovsky @ 2018-10-18 18:29 UTC (permalink / raw)
  To: shahafs; +Cc: dev, Moti Haimovsky
modified TC-flow code to use the new infrastructure
introduced in "net/mlx5: refactor TC-flow infrastructure"
commit.
Signed-off-by: Moti Haimovsky <motih@mellanox.com>
---
v5:
 * Initial version of this patch.
---
 drivers/net/mlx5/mlx5_flow_tcf.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 5ce2d5f..b890aa2 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -2116,8 +2116,8 @@ struct pedit_parser {
 /**
  * Send Netlink message with acknowledgment.
  *
- * @param nl
- *   Libmnl socket to use.
+ * @param ctx
+ *   Flow context to use.
  * @param nlh
  *   Message to send. This function always raises the NLM_F_ACK flag before
  *   sending.
@@ -2126,12 +2126,13 @@ struct pedit_parser {
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
+flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
 {
 	alignas(struct nlmsghdr)
 	uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
 		    nlh->nlmsg_len - sizeof(*nlh)];
-	uint32_t seq = random();
+	uint32_t seq = ctx->seq++;
+	struct mnl_socket *nl = ctx->nl;
 	int ret;
 
 	nlh->nlmsg_flags |= NLM_F_ACK;
@@ -2166,7 +2167,7 @@ struct pedit_parser {
 	       struct rte_flow_error *error)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct mnl_socket *nl = priv->tcf_context->nl;
+	struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
 	struct mlx5_flow *dev_flow;
 	struct nlmsghdr *nlh;
 
@@ -2176,7 +2177,7 @@ struct pedit_parser {
 	nlh = dev_flow->tcf.nlh;
 	nlh->nlmsg_type = RTM_NEWTFILTER;
 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-	if (!flow_tcf_nl_ack(nl, nlh))
+	if (!flow_tcf_nl_ack(ctx, nlh))
 		return 0;
 	return rte_flow_error_set(error, rte_errno,
 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
@@ -2195,7 +2196,7 @@ struct pedit_parser {
 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct mnl_socket *nl = priv->tcf_context->nl;
+	struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
 	struct mlx5_flow *dev_flow;
 	struct nlmsghdr *nlh;
 
@@ -2209,7 +2210,7 @@ struct pedit_parser {
 	nlh = dev_flow->tcf.nlh;
 	nlh->nlmsg_type = RTM_DELTFILTER;
 	nlh->nlmsg_flags = NLM_F_REQUEST;
-	flow_tcf_nl_ack(nl, nlh);
+	flow_tcf_nl_ack(ctx, nlh);
 }
 
 /**
@@ -2302,7 +2303,6 @@ struct pedit_parser {
 {
 	struct nlmsghdr *nlh;
 	struct tcmsg *tcm;
-	struct mnl_socket *nl = ctx->nl;
 	alignas(struct nlmsghdr)
 	uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
 
@@ -2316,7 +2316,7 @@ struct pedit_parser {
 	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
 	tcm->tcm_parent = TC_H_INGRESS;
 	/* Ignore errors when qdisc is already absent. */
-	if (flow_tcf_nl_ack(nl, nlh) &&
+	if (flow_tcf_nl_ack(ctx, nlh) &&
 	    rte_errno != EINVAL && rte_errno != ENOENT)
 		return rte_flow_error_set(error, rte_errno,
 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
@@ -2332,7 +2332,7 @@ struct pedit_parser {
 	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
 	tcm->tcm_parent = TC_H_INGRESS;
 	mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
-	if (flow_tcf_nl_ack(nl, nlh))
+	if (flow_tcf_nl_ack(ctx, nlh))
 		return rte_flow_error_set(error, rte_errno,
 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
 					  "netlink: failed to create ingress"
@@ -2373,7 +2373,7 @@ struct mlx5_flow_tcf_context *
 /**
  * Destroy a libmnl context.
  *
- * @param nl
+ * @param ctx
  *   Libmnl socket of the @p NETLINK_ROUTE kind.
  */
 void
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* [dpdk-dev] [PATCH v5 3/4] net/mlx5: add flow query abstraction interface
  2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 3/3] net/mlx5: support e-switch flow count action Moti Haimovsky
                         ` (2 preceding siblings ...)
  2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 2/4] net/mlx5: tc flow to use the new infrastructure Moti Haimovsky
@ 2018-10-18 18:29       ` Moti Haimovsky
  2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 4/4] net/mlx5: support e-switch flow count action Moti Haimovsky
  4 siblings, 0 replies; 21+ messages in thread
From: Moti Haimovsky @ 2018-10-18 18:29 UTC (permalink / raw)
  To: shahafs; +Cc: dev, Moti Haimovsky
Flow engine now supports multiple driver paths with each having
its own flow query implantation routine.
This patch adds an abstraction to the flow query routine in accordance
to commit 0c76d1c9a18d ("net/mlx5: add abstraction for multiple flow
drivers") done by Yongseok Koh.
Signed-off-by: Moti Haimovsky <motih@mellanox.com>
---
v4:
 * minor cosmetics.
v3:
 * Rebase on top of
   d80c8167c4fe ("net/mlx5: fix compilation issue on ARM SOC")
v2:
 * Rebase on top of 3f4722ee01e7 ("net/mlx5: refactor TC-flow infrastructure")
---
 drivers/net/mlx5/mlx5_flow.c       | 99 ++++++++++++--------------------------
 drivers/net/mlx5/mlx5_flow.h       |  6 +++
 drivers/net/mlx5/mlx5_flow_dv.c    | 19 ++++++++
 drivers/net/mlx5/mlx5_flow_tcf.c   | 18 +++++++
 drivers/net/mlx5/mlx5_flow_verbs.c | 87 +++++++++++++++++++++++++++++++++
 5 files changed, 162 insertions(+), 67 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index bd70fce..fcabab0 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1653,6 +1653,17 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 {
 }
 
+static int
+flow_null_query(struct rte_eth_dev *dev __rte_unused,
+		struct rte_flow *flow __rte_unused,
+		const struct rte_flow_action *actions __rte_unused,
+		void *data __rte_unused,
+		struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
 /* Void driver to protect from null pointer reference. */
 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
 	.validate = flow_null_validate,
@@ -1661,6 +1672,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 	.apply = flow_null_apply,
 	.remove = flow_null_remove,
 	.destroy = flow_null_destroy,
+	.query = flow_null_query,
 };
 
 /**
@@ -2344,92 +2356,45 @@ struct rte_flow *
 }
 
 /**
- * Query flow counter.
- *
- * @param flow
- *   Pointer to the flow.
+ * Query a flow.
  *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ * @see rte_flow_query()
+ * @see rte_flow_ops
  */
 static int
-mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
-		      void *data __rte_unused,
-		      struct rte_flow_error *error)
+flow_drv_query(struct rte_eth_dev *dev,
+	       struct rte_flow *flow,
+	       const struct rte_flow_action *actions,
+	       void *data,
+	       struct rte_flow_error *error)
 {
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-	if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
-		struct rte_flow_query_count *qc = data;
-		uint64_t counters[2] = {0, 0};
-		struct ibv_query_counter_set_attr query_cs_attr = {
-			.cs = flow->counter->cs,
-			.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
-		};
-		struct ibv_counter_set_data query_out = {
-			.out = counters,
-			.outlen = 2 * sizeof(uint64_t),
-		};
-		int err = mlx5_glue->query_counter_set(&query_cs_attr,
-						       &query_out);
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type ftype = flow->drv_type;
 
-		if (err)
-			return rte_flow_error_set
-				(error, err,
-				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				 NULL,
-				 "cannot read counter");
-		qc->hits_set = 1;
-		qc->bytes_set = 1;
-		qc->hits = counters[0] - flow->counter->hits;
-		qc->bytes = counters[1] - flow->counter->bytes;
-		if (qc->reset) {
-			flow->counter->hits = counters[0];
-			flow->counter->bytes = counters[1];
-		}
-		return 0;
-	}
-	return rte_flow_error_set(error, EINVAL,
-				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				  NULL,
-				  "flow does not have counter");
-#endif
-	return rte_flow_error_set(error, ENOTSUP,
-				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				  NULL,
-				  "counters are not available");
+	assert(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(ftype);
+
+	return fops->query(dev, flow, actions, data, error);
 }
 
 /**
- * Query a flows.
+ * Query a flow.
  *
  * @see rte_flow_query()
  * @see rte_flow_ops
  */
 int
-mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
+mlx5_flow_query(struct rte_eth_dev *dev,
 		struct rte_flow *flow,
 		const struct rte_flow_action *actions,
 		void *data,
 		struct rte_flow_error *error)
 {
-	int ret = 0;
+	int ret;
 
-	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
-		switch (actions->type) {
-		case RTE_FLOW_ACTION_TYPE_VOID:
-			break;
-		case RTE_FLOW_ACTION_TYPE_COUNT:
-			ret = mlx5_flow_query_count(flow, data, error);
-			break;
-		default:
-			return rte_flow_error_set(error, ENOTSUP,
-						  RTE_FLOW_ERROR_TYPE_ACTION,
-						  actions,
-						  "action not supported");
-		}
-		if (ret < 0)
-			return ret;
-	}
+	ret = flow_drv_query(dev, flow, actions, data, error);
+	if (ret < 0)
+		return ret;
 	return 0;
 }
 
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index ee75a80..af0a125 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -268,6 +268,11 @@ typedef void (*mlx5_flow_remove_t)(struct rte_eth_dev *dev,
 				   struct rte_flow *flow);
 typedef void (*mlx5_flow_destroy_t)(struct rte_eth_dev *dev,
 				    struct rte_flow *flow);
+typedef int (*mlx5_flow_query_t)(struct rte_eth_dev *dev,
+				 struct rte_flow *flow,
+				 const struct rte_flow_action *actions,
+				 void *data,
+				 struct rte_flow_error *error);
 struct mlx5_flow_driver_ops {
 	mlx5_flow_validate_t validate;
 	mlx5_flow_prepare_t prepare;
@@ -275,6 +280,7 @@ struct mlx5_flow_driver_ops {
 	mlx5_flow_apply_t apply;
 	mlx5_flow_remove_t remove;
 	mlx5_flow_destroy_t destroy;
+	mlx5_flow_query_t query;
 };
 
 /* mlx5_flow.c */
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index becbc57..58e3c33 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -1363,6 +1363,24 @@
 	}
 }
 
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_dv_query(struct rte_eth_dev *dev __rte_unused,
+	      struct rte_flow *flow __rte_unused,
+	      const struct rte_flow_action *actions __rte_unused,
+	      void *data __rte_unused,
+	      struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
+
 const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = {
 	.validate = flow_dv_validate,
 	.prepare = flow_dv_prepare,
@@ -1370,6 +1388,7 @@
 	.apply = flow_dv_apply,
 	.remove = flow_dv_remove,
 	.destroy = flow_dv_destroy,
+	.query = flow_dv_query,
 };
 
 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index b890aa2..6b19b2a 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -2238,6 +2238,23 @@ struct pedit_parser {
 	rte_free(dev_flow);
 }
 
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_tcf_query(struct rte_eth_dev *dev __rte_unused,
+	       struct rte_flow *flow __rte_unused,
+	       const struct rte_flow_action *actions __rte_unused,
+	       void *data __rte_unused,
+	       struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
 	.validate = flow_tcf_validate,
 	.prepare = flow_tcf_prepare,
@@ -2245,6 +2262,7 @@ struct pedit_parser {
 	.apply = flow_tcf_apply,
 	.remove = flow_tcf_remove,
 	.destroy = flow_tcf_destroy,
+	.query = flow_tcf_query,
 };
 
 /**
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 65c849c..4ae974b 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -1651,6 +1651,92 @@
 	return -rte_errno;
 }
 
+/**
+ * Query a flows.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_verbs_query_count(struct rte_eth_dev *dev __rte_unused,
+		       struct rte_flow *flow __rte_unused,
+		       void *data __rte_unused,
+		       struct rte_flow_error *error)
+{
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+	if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
+		struct rte_flow_query_count *qc = data;
+		uint64_t counters[2] = {0, 0};
+		struct ibv_query_counter_set_attr query_cs_attr = {
+			.cs = flow->counter->cs,
+			.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
+		};
+		struct ibv_counter_set_data query_out = {
+			.out = counters,
+			.outlen = 2 * sizeof(uint64_t),
+		};
+		int err = mlx5_glue->query_counter_set(&query_cs_attr,
+						       &query_out);
+
+		if (err)
+			return rte_flow_error_set
+				(error, err,
+				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				 NULL,
+				 "cannot read counter");
+		qc->hits_set = 1;
+		qc->bytes_set = 1;
+		qc->hits = counters[0] - flow->counter->hits;
+		qc->bytes = counters[1] - flow->counter->bytes;
+		if (qc->reset) {
+			flow->counter->hits = counters[0];
+			flow->counter->bytes = counters[1];
+		}
+		return 0;
+	}
+	return rte_flow_error_set(error, EINVAL,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				  NULL,
+				  "flow does not have counter");
+#endif
+	return rte_flow_error_set(error, ENOTSUP,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				  NULL,
+				  "counters are not available");
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_verbs_query(struct rte_eth_dev *dev,
+		 struct rte_flow *flow,
+		 const struct rte_flow_action *actions,
+		 void *data,
+		 struct rte_flow_error *error)
+{
+	int ret = -EINVAL;
+
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			ret = flow_verbs_query_count(dev, flow, data, error);
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	return ret;
+}
+
 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
 	.validate = flow_verbs_validate,
 	.prepare = flow_verbs_prepare,
@@ -1658,4 +1744,5 @@
 	.apply = flow_verbs_apply,
 	.remove = flow_verbs_remove,
 	.destroy = flow_verbs_destroy,
+	.query = flow_verbs_query,
 };
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* [dpdk-dev] [PATCH v5 4/4] net/mlx5: support e-switch flow count action
  2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 3/3] net/mlx5: support e-switch flow count action Moti Haimovsky
                         ` (3 preceding siblings ...)
  2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 3/4] net/mlx5: add flow query abstraction interface Moti Haimovsky
@ 2018-10-18 18:29       ` Moti Haimovsky
  2018-10-22  9:14         ` Ferruh Yigit
  4 siblings, 1 reply; 21+ messages in thread
From: Moti Haimovsky @ 2018-10-18 18:29 UTC (permalink / raw)
  To: shahafs; +Cc: dev, Moti Haimovsky
This commit adds support for configuring flows destined to the mlx5
eswitch with 'count' action and for querying these counts at runtime.
Each flow rule configured by the mlx5 driver is implicitly assigned
with flow counters. These counters can be retrieved when querying
the flow rule via Netlink, they can be found in each flow action
section of the reply. Hence, supporting the 'count' action in the
flow configuration command is straight-forward. When transposing
the command to a tc Netlink message we just ignore it instead of
rejecting it.
In the 'flow query count' side, the command now uses tc Netlink
query command in order to retrieve the values of the flow counters.
Signed-off-by: Moti Haimovsky <motih@mellanox.com>
---
v5:
 * Limited tc-flower message parsing to only the range of
   values requested.
 * Removed "problematic" define.
v4:
 * Split the patch in two.
   This patch now contains only the tcf implementation of count action
   and query.
 * Fixed compilation error found on "old" kernels.
 * Modifications according to inputs from Shahaf S.
v3:
 * Rebase on top of
   d80c8167c4fe ("net/mlx5: fix compilation issue on ARM SOC")
 * Code modifications accordig to review by Shahaf S.
   see message ID: 1539263057-16678-3-git-send-email-motih@mellanox.com
v2:
 * Rebase on top of 3f4722ee01e7 ("net/mlx5: refactor TC-flow infrastructure"
---
 drivers/net/mlx5/mlx5_flow_tcf.c | 517 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 510 insertions(+), 7 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 6b19b2a..2dc2811 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -6,6 +6,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <libmnl/libmnl.h>
+#include <linux/gen_stats.h>
 #include <linux/if_ether.h>
 #include <linux/netlink.h>
 #include <linux/pkt_cls.h>
@@ -26,6 +27,7 @@
 #include <rte_ether.h>
 #include <rte_flow.h>
 #include <rte_malloc.h>
+#include <rte_common.h>
 
 #include "mlx5.h"
 #include "mlx5_flow.h"
@@ -248,6 +250,14 @@ struct mlx5_flow_tcf_context {
 	uint8_t *buf; /* Message buffer. */
 };
 
+/** Structure used when extracting the values of a flow counters
+ * from a netlink message.
+ */
+struct flow_tcf_stats_basic {
+	bool valid;
+	struct gnet_stats_basic counters;
+};
+
 /** Empty masks for known item types. */
 static const union {
 	struct rte_flow_item_port_id port_id;
@@ -363,6 +373,43 @@ struct pedit_parser {
 	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
 };
 
+/**
+ * Create space for using the implicitly created TC flow counter.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ *
+ * @return
+ *   A pointer to the counter data structure, NULL otherwise and
+ *   rte_errno is set.
+ */
+static struct mlx5_flow_counter *
+flow_tcf_counter_new(void)
+{
+	struct mlx5_flow_counter *cnt;
+
+	/*
+	 * eswitch counter cannot be shared and its id is unknown.
+	 * currently returning all with id 0.
+	 * in the future maybe better to switch to unique numbers.
+	 */
+	struct mlx5_flow_counter tmpl = {
+		.ref_cnt = 1,
+		.shared = 0,
+		.id = 0,
+		.cs = NULL,
+		.hits = 0,
+		.bytes = 0,
+	};
+	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
+	if (!cnt) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	*cnt = tmpl;
+	/* Implicit counter, do not add to list. */
+	return cnt;
+}
 
 /**
  * Set pedit key of MAC address
@@ -1175,6 +1222,8 @@ struct pedit_parser {
 		case RTE_FLOW_ACTION_TYPE_DROP:
 			current_action_flag = MLX5_FLOW_ACTION_DROP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
 			break;
@@ -1481,6 +1530,8 @@ struct pedit_parser {
 				SZ_NLATTR_TYPE_OF(struct tc_gact);
 			flags |= MLX5_FLOW_ACTION_DROP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
 			goto action_of_vlan;
@@ -1620,6 +1671,38 @@ struct pedit_parser {
 }
 
 /**
+ * Make adjustments for supporting count actions.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 On success else a negative errno value is returned and rte_errno is set.
+ */
+static int
+flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
+				  struct mlx5_flow *dev_flow,
+				  struct rte_flow_error *error)
+{
+	struct rte_flow *flow = dev_flow->flow;
+
+	if (!flow->counter) {
+		flow->counter = flow_tcf_counter_new();
+		if (!flow->counter)
+			return rte_flow_error_set(error, rte_errno,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  NULL,
+						  "cannot get counter"
+						  " context.");
+	}
+	return 0;
+}
+
+/**
  * Translate flow for Linux TC flower and construct Netlink message.
  *
  * @param[in] priv
@@ -1676,6 +1759,7 @@ struct pedit_parser {
 	struct nlattr *na_vlan_id = NULL;
 	struct nlattr *na_vlan_priority = NULL;
 	uint64_t item_flags = 0;
+	int ret;
 
 	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
 						PTOI_TABLE_SZ_MAX(dev)));
@@ -2018,6 +2102,16 @@ struct pedit_parser {
 			mnl_attr_nest_end(nlh, na_act);
 			mnl_attr_nest_end(nlh, na_act_index);
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			/*
+			 * Driver adds the count action implicitly for
+			 * each rule it creates.
+			 */
+			ret = flow_tcf_translate_action_count(dev,
+							      dev_flow, error);
+			if (ret < 0)
+				return ret;
+			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			conf.of_push_vlan = NULL;
 			vlan_act = TCA_VLAN_ACT_POP;
@@ -2202,6 +2296,12 @@ struct pedit_parser {
 
 	if (!flow)
 		return;
+	if (flow->counter) {
+		if (--flow->counter->ref_cnt == 0) {
+			rte_free(flow->counter);
+			flow->counter = NULL;
+		}
+	}
 	dev_flow = LIST_FIRST(&flow->dev_flows);
 	if (!dev_flow)
 		return;
@@ -2239,20 +2339,423 @@ struct pedit_parser {
 }
 
 /**
+ * Helper routine for figuring the space size required for a parse buffer.
+ *
+ * @param array
+ *   array of values to use.
+ * @param idx
+ *   Current location in array.
+ * @param value
+ *   Value to compare with.
+ *
+ * @return
+ *   MAX(array[idx], value) if idx is valid. value otherwise.
+ */
+static uint16_t
+arval_max(uint16_t array[], int idx, uint16_t value)
+{
+	return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
+}
+
+/**
+ * Parse rtnetlink message attributes filling the attribute table with the info
+ * retrieved.
+ *
+ * @param tb
+ *   Attribute table to be filled.
+ * @param[out] max
+ *   Maxinum entry in the attribute table.
+ * @param rte
+ *   The attributes section in the message to be parsed.
+ * @param len
+ *   The length of the attributes section in the message.
+ */
+static void
+flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
+			 struct rtattr *rta, int len)
+{
+	unsigned short type;
+	memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+	while (RTA_OK(rta, len)) {
+		type = rta->rta_type;
+		if (type <= max && !tb[type])
+			tb[type] = rta;
+		rta = RTA_NEXT(rta, len);
+	}
+}
+
+/**
+ * Extract flow counters from flower action.
+ *
+ * @param rta
+ *   flower action stats properties in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data holding the count statistics of the rte_flow retrieved from
+ *   the message.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
+				       uint16_t rta_type[], int idx,
+				       struct gnet_stats_basic *data)
+{
+	int tca_stats_max = arval_max(rta_type, idx, 0);
+	struct rtattr *tbs[tca_stats_max + 1];
+
+	if (rta == NULL || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
+				 RTA_DATA(rta), RTA_PAYLOAD(rta));
+	switch (rta_type[idx]) {
+	case TCA_STATS_BASIC:
+		if (tbs[TCA_STATS_BASIC]) {
+			memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
+			       RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
+			       sizeof(*data)));
+			return 0;
+		}
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+/**
+ * Parse flower single action retrieving the requested action attribute,
+ * if found.
+ *
+ * @param arg
+ *   flower action properties in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   Count statistics retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
+				     uint16_t rta_type[], int idx, void *data)
+{
+	int tca_act_max = arval_max(rta_type, idx, TCA_ACT_KIND);
+	struct rtattr *tb[tca_act_max + 1];
+
+	if (arg == NULL || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, tca_act_max,
+				 RTA_DATA(arg), RTA_PAYLOAD(arg));
+	if (tb[TCA_ACT_KIND] == NULL)
+		return -1;
+	switch (rta_type[idx]) {
+	case TCA_ACT_STATS:
+		if (tb[TCA_ACT_STATS])
+			return flow_tcf_nl_action_stats_parse_and_get
+					(tb[TCA_ACT_STATS],
+					 rta_type, --idx,
+					 (struct gnet_stats_basic *)data);
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+/**
+ * Parse flower action section in the message retrieving the requested
+ * attribute from the first action that provides it.
+ *
+ * @param opt
+ *   flower section in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_action_parse_and_get(const struct rtattr *arg,
+				 uint16_t rta_type[], int idx, void *data)
+{
+	struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
+	int i;
+
+	if (arg == NULL || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
+				 RTA_DATA(arg), RTA_PAYLOAD(arg));
+	switch (rta_type[idx]) {
+	/*
+	 * flow counters are stored in the actions defined by the flow
+	 * and not in the flow itself, therefore we need to traverse the
+	 * flower chain of actions in search for them.
+	 *
+	 * Note that the index is not decremented here.
+	 */
+	case TCA_ACT_STATS:
+		for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
+			if (tb[i] &&
+			!flow_tcf_nl_parse_one_action_and_get(tb[i],
+							      rta_type,
+							      idx, data))
+				return 0;
+		}
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+/**
+ * Parse flower classifier options in the message, retrieving the requested
+ * attribute if found.
+ *
+ * @param opt
+ *   flower section in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
+			       uint16_t rta_type[], int idx, void *data)
+{
+	int tca_flower_max = arval_max(rta_type, idx, 0);
+	struct rtattr *tb[tca_flower_max + 1];
+
+	if (!opt || idx < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
+				 RTA_DATA(opt), RTA_PAYLOAD(opt));
+	switch (rta_type[idx]) {
+	case TCA_FLOWER_ACT:
+		if (tb[TCA_FLOWER_ACT])
+			return flow_tcf_nl_action_parse_and_get
+							(tb[TCA_FLOWER_ACT],
+							 rta_type, --idx, data);
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+/**
+ * Parse Netlink reply on filter query, retrieving the flow counters.
+ *
+ * @param nlh
+ *   Message received from Netlink.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_filter_parse_and_get(const struct nlmsghdr *nlh,
+				 uint16_t rta_type[], int idx, void *data)
+{
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len = nlh->nlmsg_len;
+	int tca_max = arval_max(rta_type, idx, TCA_KIND);
+	struct rtattr *tb[tca_max + 1];
+
+	if (idx < 0)
+		return -1;
+	if (nlh->nlmsg_type != RTM_NEWTFILTER &&
+	    nlh->nlmsg_type != RTM_GETTFILTER &&
+	    nlh->nlmsg_type != RTM_DELTFILTER)
+		return -1;
+	len -= NLMSG_LENGTH(sizeof(*t));
+	if (len < 0)
+		return -1;
+	flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
+	/* Not a TC flower flow - bail out */
+	if (!tb[TCA_KIND] ||
+	    strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
+		return -1;
+	switch (rta_type[idx]) {
+	case TCA_OPTIONS:
+		if (tb[TCA_OPTIONS])
+			return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
+							      rta_type,
+							      --idx, data);
+		break;
+	default:
+		break;
+	}
+	return -1;
+}
+
+/**
+ * A callback to parse Netlink reply on TC flower query.
+ *
+ * @param nlh
+ *   Message received from Netlink.
+ * @param[out] data
+ *   Pointer to data area to be filled by the parsing routine.
+ *   assumed to be a pinter to struct flow_tcf_stats_basic.
+ *
+ * @return
+ *   MNL_CB_OK value.
+ */
+static int
+flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
+{
+	/*
+	 * The backward sequence of rta_types to pass in order to get
+	 *  to the counters.
+	 */
+	uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
+				TCA_FLOWER_ACT, TCA_OPTIONS };
+	struct flow_tcf_stats_basic *sb_data = data;
+
+	if (!flow_tcf_nl_filter_parse_and_get(nlh, rta_type,
+					      RTE_DIM(rta_type) - 1,
+					      (void *)&sb_data->counters))
+		sb_data->valid = true;
+	return MNL_CB_OK;
+}
+
+/**
+ * Query a TC flower rule for its statistics via netlink.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] flow
+ *   Pointer to the sub flow.
+ * @param[out] data
+ *   data retrieved by the query.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_tcf_query_count(struct rte_eth_dev *dev,
+			  struct rte_flow *flow,
+			  void *data,
+			  struct rte_flow_error *error)
+{
+	struct flow_tcf_stats_basic sb_data = { 0 };
+	struct rte_flow_query_count *qc = data;
+	struct priv *priv = dev->data->dev_private;
+	struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
+	struct mnl_socket *nl = ctx->nl;
+	struct mlx5_flow *dev_flow;
+	struct nlmsghdr *nlh;
+	uint32_t seq = priv->tcf_context->seq++;
+	ssize_t ret;
+	assert(qc);
+
+	dev_flow = LIST_FIRST(&flow->dev_flows);
+	/* E-Switch flow can't be expanded. */
+	assert(!LIST_NEXT(dev_flow, next));
+	if (!dev_flow->flow->counter)
+		goto notsup_exit;
+	nlh = dev_flow->tcf.nlh;
+	nlh->nlmsg_type = RTM_GETTFILTER;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
+	nlh->nlmsg_seq = seq;
+	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
+		goto error_exit;
+	do {
+		ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
+		if (ret <= 0)
+			break;
+		ret = mnl_cb_run(ctx->buf, ret, seq,
+				 mnl_socket_get_portid(nl),
+				 flow_tcf_nl_message_get_stats_basic,
+				 (void *)&sb_data);
+	} while (ret > 0);
+	/* Return the delta from last reset. */
+	if (sb_data.valid) {
+		/* Return the delta from last reset. */
+		qc->hits_set = 1;
+		qc->bytes_set = 1;
+		qc->hits = sb_data.counters.packets;
+		qc->hits -= flow->counter->hits;
+		qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
+		if (qc->reset) {
+			flow->counter->hits = sb_data.counters.packets;
+			flow->counter->bytes = sb_data.counters.bytes;
+		}
+		return 0;
+	}
+	return rte_flow_error_set(error, EINVAL,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				  NULL,
+				  "flow does not have counter");
+error_exit:
+	return rte_flow_error_set
+			(error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			 NULL, "netlink: failed to read flow rule counters");
+notsup_exit:
+	return rte_flow_error_set
+			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			 NULL, "counters are not available.");
+}
+
+/**
  * Query a flow.
  *
  * @see rte_flow_query()
  * @see rte_flow_ops
  */
 static int
-flow_tcf_query(struct rte_eth_dev *dev __rte_unused,
-	       struct rte_flow *flow __rte_unused,
-	       const struct rte_flow_action *actions __rte_unused,
-	       void *data __rte_unused,
-	       struct rte_flow_error *error __rte_unused)
+flow_tcf_query(struct rte_eth_dev *dev,
+	       struct rte_flow *flow,
+	       const struct rte_flow_action *actions,
+	       void *data,
+	       struct rte_flow_error *error)
 {
-	rte_errno = ENOTSUP;
-	return -rte_errno;
+	int ret = -EINVAL;
+
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			ret = flow_tcf_query_count(dev, flow, data, error);
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	return ret;
 }
 
 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/4] support e-switch flow count action
  2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 0/4] " Moti Haimovsky
@ 2018-10-21  8:23         ` Shahaf Shuler
  0 siblings, 0 replies; 21+ messages in thread
From: Shahaf Shuler @ 2018-10-21  8:23 UTC (permalink / raw)
  To: Mordechay Haimovsky; +Cc: dev
Thursday, October 18, 2018 9:29 PM, Mordechay Haimovsky:
> Subject: [PATCH v5 0/4] support e-switch flow count action
> 
> The following series of patches adds support for reading the
> mlx5 e-switch flow counters.
> 
> Moti Haimovsky (4):
>   net/mlx5: refactor TC-flow infrastructure
>   net/mlx5: tc flow to use the new infrastructure
>   net/mlx5: add flow query abstraction interface
>   net/mlx5: support e-switch flow count action
> 
>  drivers/net/mlx5/mlx5.c            |  20 +-
>  drivers/net/mlx5/mlx5.h            |   4 +-
>  drivers/net/mlx5/mlx5_flow.c       |  99 ++----
>  drivers/net/mlx5/mlx5_flow.h       |  15 +-
>  drivers/net/mlx5/mlx5_flow_dv.c    |  19 ++
>  drivers/net/mlx5/mlx5_flow_tcf.c   | 644
> +++++++++++++++++++++++++++++++++++--
>  drivers/net/mlx5/mlx5_flow_verbs.c |  87 +++++
>  7 files changed, 773 insertions(+), 115 deletions(-)
Applied to next-net-mlx, thanks. 
> 
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [dpdk-dev] [PATCH v5 4/4] net/mlx5: support e-switch flow count action
  2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 4/4] net/mlx5: support e-switch flow count action Moti Haimovsky
@ 2018-10-22  9:14         ` Ferruh Yigit
  2018-10-22 20:10           ` Shahaf Shuler
  0 siblings, 1 reply; 21+ messages in thread
From: Ferruh Yigit @ 2018-10-22  9:14 UTC (permalink / raw)
  To: Moti Haimovsky, shahafs; +Cc: dev
On 10/18/2018 7:29 PM, Moti Haimovsky wrote:
> +/**
> + * Parse flower action section in the message retrieving the requested
> + * attribute from the first action that provides it.
> + *
> + * @param opt
> + *   flower section in the Netlink message received.
> + * @param rta_type
> + *   The backward sequence of rta_types, as written in the attribute table,
> + *   we need to traverse in order to get to the requested object.
> + * @param idx
> + *   Current location in rta_type table.
> + * @param[out] data
> + *   data retrieved from the message query.
> + *
> + * @return
> + *   0 if data was found and retrieved, -1 otherwise.
> + */
> +static int
> +flow_tcf_nl_action_parse_and_get(const struct rtattr *arg,
> +				 uint16_t rta_type[], int idx, void *data)
> +{
> +	struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
> +	int i;
> +
> +	if (arg == NULL || idx < 0)
> +		return -1;
> +	flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
> +				 RTA_DATA(arg), RTA_PAYLOAD(arg));
> +	switch (rta_type[idx]) {
> +	/*
> +	 * flow counters are stored in the actions defined by the flow
> +	 * and not in the flow itself, therefore we need to traverse the
> +	 * flower chain of actions in search for them.
> +	 *
> +	 * Note that the index is not decremented here.
> +	 */
> +	case TCA_ACT_STATS:
> +		for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
> +			if (tb[i] &&
> +			!flow_tcf_nl_parse_one_action_and_get(tb[i],
> +							      rta_type,
> +							      idx, data))
> +				return 0;
> +		}
> +		break;
> +	default:
> +		break;
> +	}
> +	return -1;
> +}
Getting following build error with clang, will not pull from mlx tree until
issue resolved, thanks.
.../drivers/net/mlx5/mlx5_flow_tcf.c:2507:6: warning: cast from 'const struct
rtattr *' to 'char *' drops const qualifier [-Wcast-qual]
                                 RTA_DATA(arg), RTA_PAYLOAD(arg));
                                 ^
/usr/include/linux/rtnetlink.h:183:42: note: expanded from macro 'RTA_DATA'
#define RTA_DATA(rta)   ((void*)(((char*)(rta)) + RTA_LENGTH(0)))
                                         ^
.../drivers/net/mlx5/mlx5_flow_tcf.c:2593:31: warning: cast from 'const struct
nlmsghdr *' to 'char *' drops const qualifier [-Wcast-qual]
        struct tcmsg *t = NLMSG_DATA(nlh);
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [dpdk-dev] [PATCH v5 4/4] net/mlx5: support e-switch flow count action
  2018-10-22  9:14         ` Ferruh Yigit
@ 2018-10-22 20:10           ` Shahaf Shuler
  2018-10-23  8:40             ` Ferruh Yigit
  0 siblings, 1 reply; 21+ messages in thread
From: Shahaf Shuler @ 2018-10-22 20:10 UTC (permalink / raw)
  To: Ferruh Yigit, Mordechay Haimovsky; +Cc: dev
Ferruh,
Monday, October 22, 2018 12:15 PM, Ferruh Yigit:
> Subject: Re: [dpdk-dev] [PATCH v5 4/4] net/mlx5: support e-switch flow
> count action
> 
> Getting following build error with clang, will not pull from mlx tree until issue
> resolved, thanks.
> 
> .../drivers/net/mlx5/mlx5_flow_tcf.c:2507:6: warning: cast from 'const struct
> rtattr *' to 'char *' drops const qualifier [-Wcast-qual]
>                                  RTA_DATA(arg), RTA_PAYLOAD(arg));
>                                  ^
> /usr/include/linux/rtnetlink.h:183:42: note: expanded from macro
> 'RTA_DATA'
> #define RTA_DATA(rta)   ((void*)(((char*)(rta)) + RTA_LENGTH(0)))
>                                          ^
> .../drivers/net/mlx5/mlx5_flow_tcf.c:2593:31: warning: cast from 'const
> struct nlmsghdr *' to 'char *' drops const qualifier [-Wcast-qual]
>         struct tcmsg *t = NLMSG_DATA(nlh);
I updated next-net-mlx with the fix for this issue. Can you confirm it is fixed? 
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [dpdk-dev] [PATCH v5 4/4] net/mlx5: support e-switch flow count action
  2018-10-22 20:10           ` Shahaf Shuler
@ 2018-10-23  8:40             ` Ferruh Yigit
  0 siblings, 0 replies; 21+ messages in thread
From: Ferruh Yigit @ 2018-10-23  8:40 UTC (permalink / raw)
  To: Shahaf Shuler, Mordechay Haimovsky; +Cc: dev
On 10/22/2018 9:10 PM, Shahaf Shuler wrote:
> Ferruh,
> 
> Monday, October 22, 2018 12:15 PM, Ferruh Yigit:
>> Subject: Re: [dpdk-dev] [PATCH v5 4/4] net/mlx5: support e-switch flow
>> count action
>>
>> Getting following build error with clang, will not pull from mlx tree until issue
>> resolved, thanks.
>>
>> .../drivers/net/mlx5/mlx5_flow_tcf.c:2507:6: warning: cast from 'const struct
>> rtattr *' to 'char *' drops const qualifier [-Wcast-qual]
>>                                  RTA_DATA(arg), RTA_PAYLOAD(arg));
>>                                  ^
>> /usr/include/linux/rtnetlink.h:183:42: note: expanded from macro
>> 'RTA_DATA'
>> #define RTA_DATA(rta)   ((void*)(((char*)(rta)) + RTA_LENGTH(0)))
>>                                          ^
>> .../drivers/net/mlx5/mlx5_flow_tcf.c:2593:31: warning: cast from 'const
>> struct nlmsghdr *' to 'char *' drops const qualifier [-Wcast-qual]
>>         struct tcmsg *t = NLMSG_DATA(nlh);
> 
> I updated next-net-mlx with the fix for this issue. Can you confirm it is fixed? 
I confirm it is good, pulled from next-net-mlx.
^ permalink raw reply	[flat|nested] 21+ messages in thread
end of thread, other threads:[~2018-10-23  8:40 UTC | newest]
Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-11 13:04 [dpdk-dev] [PATCH v2 0/2] support e-switch flow count action Mordechay Haimovsky
2018-10-11 13:04 ` [dpdk-dev] [PATCH v2 1/2] net/mlx5: refactor TC-flow infrastructure Mordechay Haimovsky
2018-10-14 11:07   ` Shahaf Shuler
2018-10-11 13:04 ` [dpdk-dev] [PATCH v2 2/2] net/mlx5: support e-switch flow count action Mordechay Haimovsky
2018-10-14 11:07   ` Shahaf Shuler
2018-10-16 23:50   ` [dpdk-dev] [PATCH v3 0/2] " Mordechay Haimovsky
2018-10-16 23:50   ` [dpdk-dev] [PATCH v3 1/2] net/mlx5: refactor TC-flow infrastructure Mordechay Haimovsky
2018-10-16 23:50   ` [dpdk-dev] [PATCH v3 2/2] net/mlx5: support e-switch flow count action Mordechay Haimovsky
2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 0/3] " Moti Haimovsky
2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 1/3] net/mlx5: refactor TC-flow infrastructure Moti Haimovsky
2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 2/3] net/mlx5: add flow query abstraction interface Moti Haimovsky
2018-10-17 17:24     ` [dpdk-dev] [PATCH v4 3/3] net/mlx5: support e-switch flow count action Moti Haimovsky
2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 0/4] " Moti Haimovsky
2018-10-21  8:23         ` Shahaf Shuler
2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 1/4] net/mlx5: refactor TC-flow infrastructure Moti Haimovsky
2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 2/4] net/mlx5: tc flow to use the new infrastructure Moti Haimovsky
2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 3/4] net/mlx5: add flow query abstraction interface Moti Haimovsky
2018-10-18 18:29       ` [dpdk-dev] [PATCH v5 4/4] net/mlx5: support e-switch flow count action Moti Haimovsky
2018-10-22  9:14         ` Ferruh Yigit
2018-10-22 20:10           ` Shahaf Shuler
2018-10-23  8:40             ` Ferruh Yigit
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).