DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] net/mlx5: support flow counters using devx
@ 2018-11-04 12:29 Mordechay Haimovsky
  2018-12-06 13:40 ` Shahaf Shuler
  0 siblings, 1 reply; 2+ messages in thread
From: Mordechay Haimovsky @ 2018-11-04 12:29 UTC (permalink / raw)
  To: dev; +Cc: Mordechay Haimovsky

This commit adds counters support when creating flows via direct
verbs. The implementation uses devx interface in order to create
query and delete the counters.
This support requires MLNX_OFED_LINUX-4.5-0.1.0.1 installation.
devx support in the firmware is enabled via the
"mcra /dev/mst/<device> 0x3ce4.7:1 1" command.

Signed-off-by: Moti Haimovsky <motih@mellanox.com>
---
 drivers/net/mlx5/Makefile          |  16 ++++
 drivers/net/mlx5/meson.build       |   7 ++
 drivers/net/mlx5/mlx5.c            |  22 ++++-
 drivers/net/mlx5/mlx5.h            |   1 +
 drivers/net/mlx5/mlx5_devx_cmds.c  | 134 ++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_devx_cmds.h  | 103 ++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow.h       |  14 ++-
 drivers/net/mlx5/mlx5_flow_dv.c    | 189 +++++++++++++++++++++++++++++++++++--
 drivers/net/mlx5/mlx5_flow_verbs.c |  14 +--
 drivers/net/mlx5/mlx5_prm.h        |   8 ++
 10 files changed, 486 insertions(+), 22 deletions(-)
 create mode 100644 drivers/net/mlx5/mlx5_devx_cmds.c
 create mode 100644 drivers/net/mlx5/mlx5_devx_cmds.h

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 7a50bcc..f8cef0a 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -36,6 +36,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_tcf.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_verbs.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_devx_cmds.c
 
 ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
 INSTALL-$(CONFIG_RTE_LIBRTE_MLX5_PMD)-lib += $(LIB_GLUE)
@@ -148,6 +149,21 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		func mlx5dv_create_flow_action_packet_reformat \
 		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
+		HAVE_IBV_FLOW_DEVX_COUNTERS \
+		infiniband/mlx5dv.h \
+		enum MLX5DV_FLOW_ACTION_COUNTER_DEVX \
+		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
+		HAVE_IBV_DEVX_CONTEXT \
+		infiniband/mlx5dv.h \
+		enum MLX5DV_CONTEXT_FLAGS_DEVX \
+		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
+		HAVE_IBV_DEVX_OBJ \
+		infiniband/mlx5dv.h \
+		func mlx5dv_devx_obj_create \
+		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
 		HAVE_ETHTOOL_LINK_MODE_25G \
 		/usr/include/linux/ethtool.h \
 		enum ETHTOOL_LINK_MODE_25000baseCR_Full_BIT \
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index 28938db..2bc2acb 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -46,6 +46,7 @@ if build
 		'mlx5_trigger.c',
 		'mlx5_txq.c',
 		'mlx5_vlan.c',
+		'mlx5_devx_cmds.c',
 	)
 	if dpdk_conf.has('RTE_ARCH_X86_64') or dpdk_conf.has('RTE_ARCH_ARM64')
 		sources += files('mlx5_rxtx_vec.c')
@@ -100,6 +101,12 @@ if build
 		'MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD' ],
 		[ 'HAVE_IBV_FLOW_DV_SUPPORT', 'infiniband/mlx5dv.h',
 		'mlx5dv_create_flow_action_packet_reformat' ],
+		[ 'HAVE_IBV_FLOW_DEVX_COUNTERS', 'infiniband/mlx5dv.h',
+		'MLX5DV_FLOW_ACTION_COUNTER_DEVX' ],
+		[ 'HAVE_IBV_FLOW_DEVX_CONTEXT', 'infiniband/mlx5dv.h',
+		'MLX5DV_CONTEXT_FLAGS_DEVX' ],
+		[ 'HAVE_IBV_FLOW_DEVX_OBJ', 'infiniband/mlx5dv.h',
+		'mlx5dv_devx_obj_create' ],
 		[ 'HAVE_IBV_DEVICE_MPLS_SUPPORT', 'infiniband/verbs.h',
 		'IBV_FLOW_SPEC_MPLS' ],
 		[ 'HAVE_IBV_WQ_FLAG_RX_END_PADDING', 'infiniband/verbs.h',
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 62ac54f..50e46bc 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -731,7 +731,7 @@
 	       struct mlx5_dev_config config,
 	       const struct mlx5_switch_info *switch_info)
 {
-	struct ibv_context *ctx;
+	struct ibv_context *ctx = NULL;
 	struct ibv_device_attr_ex attr;
 	struct ibv_port_attr port_attr;
 	struct ibv_pd *pd = NULL;
@@ -790,10 +790,22 @@
 	/* Prepare shared data between primary and secondary process. */
 	mlx5_prepare_shared_data();
 	errno = 0;
-	ctx = mlx5_glue->open_device(ibv_dev);
-	if (!ctx) {
-		rte_errno = errno ? errno : ENODEV;
-		return NULL;
+#ifdef HAVE_IBV_DEVX_CONTEXT
+	ctx = mlx5dv_open_device(ibv_dev,
+				 &(struct mlx5dv_context_attr){
+					.flags = MLX5DV_CONTEXT_FLAGS_DEVX,
+				  });
+#endif
+	if (ctx) {
+		config.devx = 1;
+		DRV_LOG(DEBUG, "DEVX is %ssupported",
+			config.devx ? "" : "not ");
+	} else {
+		ctx = mlx5_glue->open_device(ibv_dev);
+		if (!ctx) {
+			rte_errno = errno ? errno : ENODEV;
+			return NULL;
+		}
 	}
 #ifdef HAVE_IBV_MLX5_MOD_SWP
 	dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_SWP;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index bc500b2..39dc421 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -124,6 +124,7 @@ struct mlx5_dev_config {
 	unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */
 	unsigned int dv_flow_en:1; /* Enable DV flow. */
 	unsigned int swp:1; /* Tx generic tunnel checksum and TSO offload. */
+	unsigned int devx:1; /* Whether devx interface is available or not. */
 	struct {
 		unsigned int enabled:1; /* Whether MPRQ is enabled. */
 		unsigned int stride_num_n; /* Number of strides. */
diff --git a/drivers/net/mlx5/mlx5_devx_cmds.c b/drivers/net/mlx5/mlx5_devx_cmds.c
new file mode 100644
index 0000000..94255e8
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_devx_cmds.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/* Copyright 2018 Mellanox Technologies, Ltd */
+
+#include <rte_flow_driver.h>
+
+#include "mlx5.h"
+#include "mlx5_devx_cmds.h"
+#include "mlx5_prm.h"
+
+/*
+ * Dummy struct to prevent compilation errors when
+ * mlx5dv_devx_obj is not defined in mlx5dv.h
+ */
+#ifndef HAVE_IBV_DEVX_OBJ
+struct mlx5dv_devx_obj {
+	void *ctx;
+};
+#endif /* HAVE_IBV_DEVX_OBJ */
+
+/**
+ * Allocate flow counters via devx interface.
+ *
+ * @param[in] ctx
+ *   ibv contexts returned from mlx5dv_open_device.
+ * @param dcs
+ *   Pointer to counters properties structure to be filled by the routine.
+ *
+ * @return
+ *   0 on success, a negative value otherwise.
+ */
+int mlx5_devx_cmd_fc_alloc(struct ibv_context *ctx,
+			   struct mlx5_devx_counter_set *dcs)
+{
+#if defined(HAVE_IBV_FLOW_DEVX_COUNTERS) && defined(HAVE_IBV_DEVX_OBJ)
+	uint32_t in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]   = {0};
+	uint32_t out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
+	int status, syndrome;
+
+	MLX5_SET(alloc_flow_counter_in, in, opcode,
+		 MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
+	dcs->obj = mlx5dv_devx_obj_create(ctx,
+					  in, sizeof(in), out, sizeof(out));
+	if (!dcs->obj)
+		return -errno;
+	status = MLX5_GET(query_flow_counter_out, out, status);
+	syndrome = MLX5_GET(query_flow_counter_out, out, syndrome);
+	if (status) {
+		DRV_LOG(DEBUG, "Failed to create devx counters, "
+			"status %x, syndrome %x", status, syndrome);
+		return -1;
+	}
+	dcs->id = MLX5_GET(alloc_flow_counter_out,
+			   out, flow_counter_id);
+	return 0;
+#else
+	(void)ctx;
+	(void)dcs;
+	return -ENOTSUP;
+#endif /* HAVE_IBV_FLOW_DEVX_COUNTERS && HAVE_IBV_DEVX_OBJ */
+}
+
+/**
+ * Free flow counters obtained via devx interface.
+ *
+ * @param[in] obj
+ *   devx object that was obtained from mlx5_devx_cmd_fc_alloc.
+ *
+ * @return
+ *   0 on success, a negative value otherwise.
+ */
+int mlx5_devx_cmd_fc_free(struct mlx5dv_devx_obj *obj)
+{
+#if defined(HAVE_IBV_FLOW_DEVX_COUNTERS) && defined(HAVE_IBV_DEVX_OBJ)
+	return mlx5dv_devx_obj_destroy(obj);
+#else
+	(void)obj;
+	return -ENOTSUP;
+#endif /* HAVE_IBV_FLOW_DEVX_COUNTERS && HAVE_IBV_DEVX_OBJ */
+}
+
+/**
+ * Query flow counters values.
+ *
+ * @param[in] dcs
+ *   devx object that was obtained from mlx5_devx_cmd_fc_alloc.
+ * @param[in] clear
+ *   Whether hardware should clear the counters after the query or not.
+ *  @param pkts
+ *   The number of packets that matched the flow.
+ *  @param bytes
+ *    The number of bytes that matched the flow.
+ *
+ * @return
+ *   0 on success, a negative value otherwise.
+ */
+int
+mlx5_devx_cmd_fc_query(struct mlx5_devx_counter_set *dcs,
+		       int clear __rte_unused,
+		       uint64_t *pkts, uint64_t *bytes)
+{
+#if defined(HAVE_IBV_FLOW_DEVX_COUNTERS) && defined(HAVE_IBV_DEVX_OBJ)
+	uint32_t out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+		MLX5_ST_SZ_BYTES(traffic_counter)]   = {0};
+	uint32_t in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
+	void *stats;
+	int status, syndrome, rc;
+
+	MLX5_SET(query_flow_counter_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
+	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
+	MLX5_SET(query_flow_counter_in, in, flow_counter_id, dcs->id);
+	rc = mlx5dv_devx_obj_query(dcs->obj, in, sizeof(in), out, sizeof(out));
+	if (rc)
+		return rc;
+	status = MLX5_GET(query_flow_counter_out, out, status);
+	syndrome = MLX5_GET(query_flow_counter_out, out, syndrome);
+	if (status) {
+		DRV_LOG(DEBUG, "Failed to query devx counters, "
+			"id %d, status %x, syndrome = %x",
+			status, syndrome, dcs->id);
+		return -1;
+	}
+	stats = MLX5_ADDR_OF(query_flow_counter_out,
+			     out, flow_statistics);
+	*pkts = MLX5_GET64(traffic_counter, stats, packets);
+	*bytes = MLX5_GET64(traffic_counter, stats, octets);
+	return 0;
+#else
+	(void)dcs;
+	(void)pkts;
+	(void)bytes;
+	return -ENOTSUP;
+#endif /* HAVE_IBV_FLOW_DEVX_COUNTERS && HAVE_IBV_DEVX_OBJ */
+}
diff --git a/drivers/net/mlx5/mlx5_devx_cmds.h b/drivers/net/mlx5/mlx5_devx_cmds.h
new file mode 100644
index 0000000..2f42e9d
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_devx_cmds.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#ifndef RTE_PMD_MLX5_DEVX_CMDS_H_
+#define RTE_PMD_MLX5_DEVX_CMDS_H_
+
+#include <rte_common.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/mlx5dv.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include "mlx5_autoconf.h"
+
+#ifndef HAVE_IBV_DEVX_OBJ
+struct mlx5dv_devx_obj;
+#endif
+
+struct mlx5_devx_counter_set {
+	struct mlx5dv_devx_obj *obj;
+	int id; /* Flow counter ID */
+};
+
+enum {
+	MLX5_CMD_OP_ALLOC_FLOW_COUNTER = 0x939,
+	MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b,
+};
+
+/* Flow counters. */
+struct mlx5_ifc_alloc_flow_counter_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+	u8         syndrome[0x20];
+	u8         flow_counter_id[0x20];
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_alloc_flow_counter_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_dealloc_flow_counter_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+	u8         syndrome[0x20];
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_dealloc_flow_counter_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+	u8         flow_counter_id[0x20];
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_traffic_counter_bits {
+	u8         packets[0x40];
+	u8         octets[0x40];
+};
+
+struct mlx5_ifc_query_flow_counter_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+	u8         syndrome[0x20];
+	u8         reserved_at_40[0x40];
+	struct mlx5_ifc_traffic_counter_bits flow_statistics[];
+};
+
+struct mlx5_ifc_query_flow_counter_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+	u8         reserved_at_40[0x80];
+	u8         clear[0x1];
+	u8         reserved_at_c1[0xf];
+	u8         num_of_counters[0x10];
+	u8         flow_counter_id[0x20];
+};
+
+/* mlx5_devx_cmds.c */
+
+int mlx5_devx_cmd_fc_alloc(struct ibv_context *ctx,
+			   struct mlx5_devx_counter_set *dcx);
+int mlx5_devx_cmd_fc_free(struct mlx5dv_devx_obj *obj);
+int mlx5_devx_cmd_fc_query(struct mlx5_devx_counter_set *dcx,
+			   int clear,
+			   uint64_t *pkts, uint64_t *bytes);
+
+#endif /* RTE_PMD_MLX5_DEVX_CMDS_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 2a3ce44..c083e80 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -21,6 +21,10 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#include "mlx5.h"
+#include "mlx5_devx_cmds.h"
+#include "mlx5_prm.h"
+
 /* Pattern outer Layer bits. */
 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
@@ -269,13 +273,17 @@ struct mlx5_flow {
 struct mlx5_flow_counter {
 	LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */
 	uint32_t shared:1; /**< Share counter ID with other flow rules. */
-	uint32_t ref_cnt:31; /**< Reference counter. */
+	uint32_t ref_cnt:30; /**< Reference counter. */
+	uint32_t devx_cnt:1; /**< Devx counter */
 	uint32_t id; /**< Counter ID. */
+	union {  /**< Holds the counters for the rule. */
 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
-	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
+		struct ibv_counter_set *cs;
 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
-	struct ibv_counters *cs; /**< Holds the counters for the rule. */
+		struct ibv_counters *cs;
 #endif
+		struct mlx5_devx_counter_set *dcs;
+	};
 	uint64_t hits; /**< Number of packets matched by the rule. */
 	uint64_t bytes; /**< Number of bytes matched by the rule. */
 };
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index c11ecd4..8fa5de8 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -32,6 +32,7 @@
 #include "mlx5_prm.h"
 #include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_devx_cmds.h"
 
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 
@@ -704,6 +705,88 @@
 }
 
 /**
+ * Get or create a flow counter.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] shared
+ *   Indicate if this counter is shared with other flows.
+ * @param[in] id
+ *   Counter identifier.
+ *
+ * @return
+ *   A pointer to the counter, NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_flow_counter *
+flow_dv_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct mlx5_flow_counter *cnt = NULL;
+	struct mlx5_devx_counter_set *dcs = NULL;
+	int ret;
+
+	if (!priv->config.devx) {
+		ret = -EINVAL;
+		goto error_exit;
+	}
+	if (shared) {
+		LIST_FOREACH(cnt, &priv->flow_counters, next) {
+			if (cnt->shared && cnt->id == id) {
+				cnt->ref_cnt++;
+				return cnt;
+			}
+		}
+	}
+	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
+	dcs = rte_calloc(__func__, 1, sizeof(*dcs), 0);
+	if (!dcs || !cnt) {
+		ret = -ENOMEM;
+		goto error_exit;
+	}
+	ret = mlx5_devx_cmd_fc_alloc(priv->ctx, dcs);
+	if (ret)
+		goto error_exit;
+	struct mlx5_flow_counter tmpl = {
+		.shared = shared,
+		.ref_cnt = 1,
+		.devx_cnt = 1,
+		.id = id,
+		.dcs = dcs,
+	};
+	*cnt = tmpl;
+	LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
+	return cnt;
+error_exit:
+	rte_free(cnt);
+	rte_free(dcs);
+	rte_errno = -ret;
+	return NULL;
+}
+
+/**
+ * Release a flow counter.
+ *
+ * @param[in] counter
+ *   Pointer to the counter handler.
+ */
+static void
+flow_dv_counter_release(struct mlx5_flow_counter *counter)
+{
+	int ret;
+
+	if (!counter)
+		return;
+	if (--counter->ref_cnt == 0) {
+		ret = mlx5_devx_cmd_fc_free(counter->dcs->obj);
+		if (ret)
+			DRV_LOG(ERR, "Failed to free devx counters, %d", ret);
+		LIST_REMOVE(counter, next);
+		rte_free(counter->dcs);
+		rte_free(counter);
+	}
+}
+
+/**
  * Verify the @p attributes will be correctly understood by the NIC and store
  * them in the @p flow if everything is correct.
  *
@@ -778,6 +861,9 @@
 	int tunnel = 0;
 	uint8_t next_protocol = 0xff;
 	int actions_n = 0;
+#ifdef HAVE_IBV_FLOW_DEVX_COUNTERS
+	struct priv *priv = dev->data->dev_private;
+#endif
 
 	if (items == NULL)
 		return -1;
@@ -941,9 +1027,14 @@
 			++actions_n;
 			break;
 		case RTE_FLOW_ACTION_TYPE_COUNT:
-			ret = mlx5_flow_validate_action_count(dev, attr, error);
-			if (ret < 0)
-				return ret;
+#ifdef HAVE_IBV_FLOW_DEVX_COUNTERS
+			if (!priv->config.devx)
+#endif
+				return rte_flow_error_set
+					      (error, ENOTSUP,
+					       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					       NULL,
+					       "count action not supported");
 			action_flags |= MLX5_FLOW_ACTION_COUNT;
 			++actions_n;
 			break;
@@ -1693,6 +1784,7 @@
 	}
 }
 
+
 /**
  * Store the requested actions in an array.
  *
@@ -1719,6 +1811,7 @@
 {
 	const struct rte_flow_action_queue *queue;
 	const struct rte_flow_action_rss *rss;
+	const struct rte_flow_action_count *count;
 	int actions_n = dev_flow->dv.actions_n;
 	struct rte_flow *flow = dev_flow->flow;
 	const struct rte_flow_action *action_ptr = action;
@@ -1837,6 +1930,22 @@
 		/* If decap is followed by encap, handle it at encap case. */
 		flow->actions |= MLX5_FLOW_ACTION_RAW_DECAP;
 		break;
+	case RTE_FLOW_ACTION_TYPE_COUNT:
+		count = action->conf;
+		flow->counter = flow_dv_counter_new(dev,
+						    count->shared, count->id);
+		if (!flow->counter)
+			return rte_flow_error_set(error, rte_errno,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  action,
+						  "cannot create counter"
+						  " object.");
+		dev_flow->dv.actions[actions_n].type =
+					MLX5DV_FLOW_ACTION_COUNTER_DEVX;
+		dev_flow->dv.actions[actions_n].obj = flow->counter->dcs->obj;
+		flow->actions |= MLX5_FLOW_ACTION_COUNT;
+		actions_n++;
+		break;
 	default:
 		break;
 	}
@@ -2203,8 +2312,6 @@
 			dv->hrxq = NULL;
 		}
 	}
-	if (flow->counter)
-		flow->counter = NULL;
 }
 
 /**
@@ -2223,6 +2330,10 @@
 	if (!flow)
 		return;
 	flow_dv_remove(dev, flow);
+	if (flow->counter) {
+		flow_dv_counter_release(flow->counter);
+		flow->counter = NULL;
+	}
 	while (!LIST_EMPTY(&flow->dev_flows)) {
 		dev_flow = LIST_FIRST(&flow->dev_flows);
 		LIST_REMOVE(dev_flow, next);
@@ -2235,6 +2346,55 @@
 }
 
 /**
+ * Query a dv flow  rule for its statistics via devx.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] flow
+ *   Pointer to the sub flow.
+ * @param[out] data
+ *   data retrieved by the query.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_query_count(struct rte_flow *flow, void *data,
+		    struct rte_flow_error *error)
+{
+	struct rte_flow_query_count *qc = data;
+	uint64_t pkts = 0;
+	uint64_t bytes = 0;
+	int err;
+
+	if (flow->counter) {
+		err = mlx5_devx_cmd_fc_query(flow->counter->dcs,
+					     qc->reset, &pkts, &bytes);
+		if (err)
+			return rte_flow_error_set
+				(error, err,
+				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				 NULL,
+				 "cannot read counter");
+		qc->hits_set = 1;
+		qc->bytes_set = 1;
+		qc->hits = pkts - flow->counter->hits;
+		qc->bytes = bytes - flow->counter->bytes;
+		if (qc->reset) {
+			flow->counter->hits = pkts;
+			flow->counter->bytes = bytes;
+		}
+		return 0;
+	}
+	return rte_flow_error_set(error, ENOTSUP,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				  NULL,
+				  "counters are not available");
+}
+
+/**
  * Query a flow.
  *
  * @see rte_flow_query()
@@ -2247,8 +2407,23 @@
 	      void *data __rte_unused,
 	      struct rte_flow_error *error __rte_unused)
 {
-	rte_errno = ENOTSUP;
-	return -rte_errno;
+	int ret = -EINVAL;
+
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			ret = flow_dv_query_count(flow, data, error);
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	return ret;
 }
 
 
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 2e506b9..ba63945 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -117,13 +117,13 @@
 	struct mlx5_flow_counter *cnt;
 	int ret;
 
-	LIST_FOREACH(cnt, &priv->flow_counters, next) {
-		if (!cnt->shared || cnt->shared != shared)
-			continue;
-		if (cnt->id != id)
-			continue;
-		cnt->ref_cnt++;
-		return cnt;
+	if (shared) {
+		LIST_FOREACH(cnt, &priv->flow_counters, next) {
+			if (cnt->shared && cnt->id == id) {
+				cnt->ref_cnt++;
+				return cnt;
+			}
+		}
 	}
 	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
 	if (!cnt) {
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 29742b1..5ff2dd0 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -368,6 +368,7 @@ struct mlx5_modification_cmd {
 #define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - \
 				    (__mlx5_bit_off(typ, fld) & 0x1f))
 #define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32)
+#define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64)
 #define __mlx5_dw_mask(typ, fld) (__mlx5_mask(typ, fld) << \
 				  __mlx5_dw_bit_off(typ, fld))
 #define __mlx5_mask(typ, fld) ((u32)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
@@ -375,6 +376,7 @@ struct mlx5_modification_cmd {
 #define __mlx5_16_bit_off(typ, fld) (16 - __mlx5_bit_sz(typ, fld) - \
 				    (__mlx5_bit_off(typ, fld) & 0xf))
 #define __mlx5_mask16(typ, fld) ((u16)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
+#define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8)
 #define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32)
 #define MLX5_ST_SZ_DB(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8)
 #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8)
@@ -391,10 +393,16 @@ struct mlx5_modification_cmd {
 				 (((_v) & __mlx5_mask(typ, fld)) << \
 				   __mlx5_dw_bit_off(typ, fld))); \
 	} while (0)
+#define MLX5_GET(typ, p, fld) \
+	((rte_be_to_cpu_32(*((__be32 *)(p) +\
+	__mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \
+	__mlx5_mask(typ, fld))
 #define MLX5_GET16(typ, p, fld) \
 	((rte_be_to_cpu_16(*((__be16 *)(p) + \
 	  __mlx5_16_off(typ, fld))) >> __mlx5_16_bit_off(typ, fld)) & \
 	 __mlx5_mask16(typ, fld))
+#define MLX5_GET64(typ, p, fld) rte_be_to_cpu_64(*((__be64 *)(p) + \
+						   __mlx5_64_off(typ, fld)))
 #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8)
 
 struct mlx5_ifc_fte_match_set_misc_bits {
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [dpdk-dev] [PATCH] net/mlx5: support flow counters using devx
  2018-11-04 12:29 [dpdk-dev] [PATCH] net/mlx5: support flow counters using devx Mordechay Haimovsky
@ 2018-12-06 13:40 ` Shahaf Shuler
  0 siblings, 0 replies; 2+ messages in thread
From: Shahaf Shuler @ 2018-12-06 13:40 UTC (permalink / raw)
  To: Mordechay Haimovsky, dev; +Cc: Mordechay Haimovsky

Hi Moti,

See some comments. 

Sunday, November 4, 2018 2:30 PM, Mordechay Haimovsky:
> Subject: [dpdk-dev] [PATCH] net/mlx5: support flow counters using devx
> 
> This commit adds counters support when creating flows via direct verbs. The
> implementation uses devx interface in order to create query and delete the
> counters.
> This support requires MLNX_OFED_LINUX-4.5-0.1.0.1 installation.
> devx support in the firmware is enabled via the "mcra /dev/mst/<device>
> 0x3ce4.7:1 1" command.

We shouldn't encourage the user to do mcra commands. The support should exists on the FW w/o the need to enable it from the user. 
I think it is already in FW. 


> 
> Signed-off-by: Moti Haimovsky <motih@mellanox.com>
> ---
>  drivers/net/mlx5/Makefile          |  16 ++++
>  drivers/net/mlx5/meson.build       |   7 ++
>  drivers/net/mlx5/mlx5.c            |  22 ++++-
>  drivers/net/mlx5/mlx5.h            |   1 +
>  drivers/net/mlx5/mlx5_devx_cmds.c  | 134
> ++++++++++++++++++++++++++  drivers/net/mlx5/mlx5_devx_cmds.h  |
> 103 ++++++++++++++++++++
>  drivers/net/mlx5/mlx5_flow.h       |  14 ++-
>  drivers/net/mlx5/mlx5_flow_dv.c    | 189
> +++++++++++++++++++++++++++++++++++--
>  drivers/net/mlx5/mlx5_flow_verbs.c |  14 +--
>  drivers/net/mlx5/mlx5_prm.h        |   8 ++
>  10 files changed, 486 insertions(+), 22 deletions(-)  create mode 100644
> drivers/net/mlx5/mlx5_devx_cmds.c  create mode 100644
> drivers/net/mlx5/mlx5_devx_cmds.h
> 
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index
> 7a50bcc..f8cef0a 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -36,6 +36,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) +=
> mlx5_flow_tcf.c
>  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_verbs.c
>  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
>  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c
> +SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_devx_cmds.c
> 
>  ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
>  INSTALL-$(CONFIG_RTE_LIBRTE_MLX5_PMD)-lib += $(LIB_GLUE) @@ -148,6
> +149,21 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
>  		func mlx5dv_create_flow_action_packet_reformat \
>  		$(AUTOCONF_OUTPUT)
>  	$Q sh -- '$<' '$@' \
> +		HAVE_IBV_FLOW_DEVX_COUNTERS \
> +		infiniband/mlx5dv.h \
> +		enum MLX5DV_FLOW_ACTION_COUNTER_DEVX \
> +		$(AUTOCONF_OUTPUT)

This is a validator for a new action for dv, ok

> +	$Q sh -- '$<' '$@' \
> +		HAVE_IBV_DEVX_CONTEXT \
> +		infiniband/mlx5dv.h \
> +		enum MLX5DV_CONTEXT_FLAGS_DEVX \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_IBV_DEVX_OBJ \
> +		infiniband/mlx5dv.h \
> +		func mlx5dv_devx_obj_create \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \

Those two looks the same, and should be merge into a single case called DEVX_SUPPORT or similar. 

>  		HAVE_ETHTOOL_LINK_MODE_25G \
>  		/usr/include/linux/ethtool.h \
>  		enum ETHTOOL_LINK_MODE_25000baseCR_Full_BIT \ diff --
> git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build index
> 28938db..2bc2acb 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -46,6 +46,7 @@ if build
>  		'mlx5_trigger.c',
>  		'mlx5_txq.c',
>  		'mlx5_vlan.c',
> +		'mlx5_devx_cmds.c',
>  	)
>  	if dpdk_conf.has('RTE_ARCH_X86_64') or
> dpdk_conf.has('RTE_ARCH_ARM64')
>  		sources += files('mlx5_rxtx_vec.c')
> @@ -100,6 +101,12 @@ if build
>  		'MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD' ],
>  		[ 'HAVE_IBV_FLOW_DV_SUPPORT', 'infiniband/mlx5dv.h',
>  		'mlx5dv_create_flow_action_packet_reformat' ],
> +		[ 'HAVE_IBV_FLOW_DEVX_COUNTERS',
> 'infiniband/mlx5dv.h',
> +		'MLX5DV_FLOW_ACTION_COUNTER_DEVX' ],
> +		[ 'HAVE_IBV_FLOW_DEVX_CONTEXT', 'infiniband/mlx5dv.h',
> +		'MLX5DV_CONTEXT_FLAGS_DEVX' ],
> +		[ 'HAVE_IBV_FLOW_DEVX_OBJ', 'infiniband/mlx5dv.h',
> +		'mlx5dv_devx_obj_create' ],
>  		[ 'HAVE_IBV_DEVICE_MPLS_SUPPORT', 'infiniband/verbs.h',
>  		'IBV_FLOW_SPEC_MPLS' ],
>  		[ 'HAVE_IBV_WQ_FLAG_RX_END_PADDING',
> 'infiniband/verbs.h', diff --git a/drivers/net/mlx5/mlx5.c
> b/drivers/net/mlx5/mlx5.c index 62ac54f..50e46bc 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -731,7 +731,7 @@
>  	       struct mlx5_dev_config config,
>  	       const struct mlx5_switch_info *switch_info)  {
> -	struct ibv_context *ctx;
> +	struct ibv_context *ctx = NULL;
>  	struct ibv_device_attr_ex attr;
>  	struct ibv_port_attr port_attr;
>  	struct ibv_pd *pd = NULL;
> @@ -790,10 +790,22 @@
>  	/* Prepare shared data between primary and secondary process. */
>  	mlx5_prepare_shared_data();
>  	errno = 0;
> -	ctx = mlx5_glue->open_device(ibv_dev);
> -	if (!ctx) {
> -		rte_errno = errno ? errno : ENODEV;
> -		return NULL;
> +#ifdef HAVE_IBV_DEVX_CONTEXT
> +	ctx = mlx5dv_open_device(ibv_dev,
> +				 &(struct mlx5dv_context_attr){
> +					.flags =
> MLX5DV_CONTEXT_FLAGS_DEVX,
> +				  });
> +#endif
> +	if (ctx) {
> +		config.devx = 1;
> +		DRV_LOG(DEBUG, "DEVX is %ssupported",
> +			config.devx ? "" : "not ");
> +	} else {
> +		ctx = mlx5_glue->open_device(ibv_dev);
> +		if (!ctx) {
> +			rte_errno = errno ? errno : ENODEV;
> +			return NULL;
> +		}
>  	}
>  #ifdef HAVE_IBV_MLX5_MOD_SWP
>  	dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_SWP; diff --git
> a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> bc500b2..39dc421 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -124,6 +124,7 @@ struct mlx5_dev_config {
>  	unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */
>  	unsigned int dv_flow_en:1; /* Enable DV flow. */
>  	unsigned int swp:1; /* Tx generic tunnel checksum and TSO offload.
> */
> +	unsigned int devx:1; /* Whether devx interface is available or not. */
>  	struct {
>  		unsigned int enabled:1; /* Whether MPRQ is enabled. */
>  		unsigned int stride_num_n; /* Number of strides. */ diff --git
> a/drivers/net/mlx5/mlx5_devx_cmds.c
> b/drivers/net/mlx5/mlx5_devx_cmds.c
> new file mode 100644
> index 0000000..94255e8
> --- /dev/null
> +++ b/drivers/net/mlx5/mlx5_devx_cmds.c
> @@ -0,0 +1,134 @@
> +// SPDX-License-Identifier: BSD-3-Clause
> +/* Copyright 2018 Mellanox Technologies, Ltd */
> +
> +#include <rte_flow_driver.h>
> +
> +#include "mlx5.h"
> +#include "mlx5_devx_cmds.h"
> +#include "mlx5_prm.h"
> +
> +/*
> + * Dummy struct to prevent compilation errors when
> + * mlx5dv_devx_obj is not defined in mlx5dv.h  */ #ifndef
> +HAVE_IBV_DEVX_OBJ struct mlx5dv_devx_obj {
> +	void *ctx;
> +};
> +#endif /* HAVE_IBV_DEVX_OBJ */
> +
> +/**
> + * Allocate flow counters via devx interface.
> + *
> + * @param[in] ctx
> + *   ibv contexts returned from mlx5dv_open_device.
> + * @param dcs
> + *   Pointer to counters properties structure to be filled by the routine.
> + *
> + * @return
> + *   0 on success, a negative value otherwise.
> + */
> +int mlx5_devx_cmd_fc_alloc(struct ibv_context *ctx,
> +			   struct mlx5_devx_counter_set *dcs) { #if
> +defined(HAVE_IBV_FLOW_DEVX_COUNTERS) &&
> defined(HAVE_IBV_DEVX_OBJ)
> +	uint32_t in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]   = {0};
> +	uint32_t out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
> +	int status, syndrome;
> +
> +	MLX5_SET(alloc_flow_counter_in, in, opcode,
> +		 MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
> +	dcs->obj = mlx5dv_devx_obj_create(ctx,
> +					  in, sizeof(in), out, sizeof(out));

The mlx5dv_devx_obj_create should be part of mlx5_glue. You can handle the ifdef inside the function. 
Same for other mlx5dv_devx_* functions. 

> +	if (!dcs->obj)
> +		return -errno;
> +	status = MLX5_GET(query_flow_counter_out, out, status);
> +	syndrome = MLX5_GET(query_flow_counter_out, out, syndrome);
> +	if (status) {
> +		DRV_LOG(DEBUG, "Failed to create devx counters, "
> +			"status %x, syndrome %x", status, syndrome);
> +		return -1;
> +	}
> +	dcs->id = MLX5_GET(alloc_flow_counter_out,
> +			   out, flow_counter_id);
> +	return 0;
> +#else
> +	(void)ctx;
> +	(void)dcs;
> +	return -ENOTSUP;
> +#endif /* HAVE_IBV_FLOW_DEVX_COUNTERS && HAVE_IBV_DEVX_OBJ */
> }
> +
> +/**
> + * Free flow counters obtained via devx interface.
> + *
> + * @param[in] obj
> + *   devx object that was obtained from mlx5_devx_cmd_fc_alloc.
> + *
> + * @return
> + *   0 on success, a negative value otherwise.
> + */
> +int mlx5_devx_cmd_fc_free(struct mlx5dv_devx_obj *obj) { #if
> +defined(HAVE_IBV_FLOW_DEVX_COUNTERS) &&
> defined(HAVE_IBV_DEVX_OBJ)
> +	return mlx5dv_devx_obj_destroy(obj);
> +#else
> +	(void)obj;
> +	return -ENOTSUP;
> +#endif /* HAVE_IBV_FLOW_DEVX_COUNTERS && HAVE_IBV_DEVX_OBJ */
> }
> +
> +/**
> + * Query flow counters values.
> + *
> + * @param[in] dcs
> + *   devx object that was obtained from mlx5_devx_cmd_fc_alloc.
> + * @param[in] clear
> + *   Whether hardware should clear the counters after the query or not.
> + *  @param pkts
> + *   The number of packets that matched the flow.
> + *  @param bytes
> + *    The number of bytes that matched the flow.
> + *
> + * @return
> + *   0 on success, a negative value otherwise.
> + */
> +int
> +mlx5_devx_cmd_fc_query(struct mlx5_devx_counter_set *dcs,
> +		       int clear __rte_unused,
> +		       uint64_t *pkts, uint64_t *bytes) { #if
> +defined(HAVE_IBV_FLOW_DEVX_COUNTERS) &&
> defined(HAVE_IBV_DEVX_OBJ)
> +	uint32_t out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
> +		MLX5_ST_SZ_BYTES(traffic_counter)]   = {0};
> +	uint32_t in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
> +	void *stats;
> +	int status, syndrome, rc;
> +
> +	MLX5_SET(query_flow_counter_in, in, opcode,
> +		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
> +	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
> +	MLX5_SET(query_flow_counter_in, in, flow_counter_id, dcs->id);
> +	rc = mlx5dv_devx_obj_query(dcs->obj, in, sizeof(in), out,
> sizeof(out));
> +	if (rc)
> +		return rc;
> +	status = MLX5_GET(query_flow_counter_out, out, status);
> +	syndrome = MLX5_GET(query_flow_counter_out, out, syndrome);
> +	if (status) {
> +		DRV_LOG(DEBUG, "Failed to query devx counters, "
> +			"id %d, status %x, syndrome = %x",
> +			status, syndrome, dcs->id);
> +		return -1;
> +	}
> +	stats = MLX5_ADDR_OF(query_flow_counter_out,
> +			     out, flow_statistics);
> +	*pkts = MLX5_GET64(traffic_counter, stats, packets);
> +	*bytes = MLX5_GET64(traffic_counter, stats, octets);
> +	return 0;
> +#else
> +	(void)dcs;
> +	(void)pkts;
> +	(void)bytes;
> +	return -ENOTSUP;
> +#endif /* HAVE_IBV_FLOW_DEVX_COUNTERS && HAVE_IBV_DEVX_OBJ */
> }
> diff --git a/drivers/net/mlx5/mlx5_devx_cmds.h
> b/drivers/net/mlx5/mlx5_devx_cmds.h
> new file mode 100644
> index 0000000..2f42e9d
> --- /dev/null
> +++ b/drivers/net/mlx5/mlx5_devx_cmds.h

Why the input/output parameters are not part of mlx5_prm.h? 

> @@ -0,0 +1,103 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2018 Mellanox Technologies, Ltd  */
> +
> +#ifndef RTE_PMD_MLX5_DEVX_CMDS_H_
> +#define RTE_PMD_MLX5_DEVX_CMDS_H_
> +
> +#include <rte_common.h>
> +
> +/* Verbs header. */
> +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic.
> +*/ #ifdef PEDANTIC #pragma GCC diagnostic ignored "-Wpedantic"
> +#endif
> +#include <infiniband/mlx5dv.h>
> +#ifdef PEDANTIC
> +#pragma GCC diagnostic error "-Wpedantic"
> +#endif
> +
> +#include "mlx5_autoconf.h"
> +
> +#ifndef HAVE_IBV_DEVX_OBJ
> +struct mlx5dv_devx_obj;
> +#endif
> +
> +struct mlx5_devx_counter_set {
> +	struct mlx5dv_devx_obj *obj;
> +	int id; /* Flow counter ID */
> +};
> +
> +enum {
> +	MLX5_CMD_OP_ALLOC_FLOW_COUNTER = 0x939,
> +	MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b, };
> +
> +/* Flow counters. */
> +struct mlx5_ifc_alloc_flow_counter_out_bits {
> +	u8         status[0x8];
> +	u8         reserved_at_8[0x18];
> +	u8         syndrome[0x20];
> +	u8         flow_counter_id[0x20];
> +	u8         reserved_at_60[0x20];
> +};
> +
> +struct mlx5_ifc_alloc_flow_counter_in_bits {
> +	u8         opcode[0x10];
> +	u8         reserved_at_10[0x10];
> +	u8         reserved_at_20[0x10];
> +	u8         op_mod[0x10];
> +	u8         reserved_at_40[0x40];
> +};
> +
> +struct mlx5_ifc_dealloc_flow_counter_out_bits {
> +	u8         status[0x8];
> +	u8         reserved_at_8[0x18];
> +	u8         syndrome[0x20];
> +	u8         reserved_at_40[0x40];
> +};
> +
> +struct mlx5_ifc_dealloc_flow_counter_in_bits {
> +	u8         opcode[0x10];
> +	u8         reserved_at_10[0x10];
> +	u8         reserved_at_20[0x10];
> +	u8         op_mod[0x10];
> +	u8         flow_counter_id[0x20];
> +	u8         reserved_at_60[0x20];
> +};
> +
> +struct mlx5_ifc_traffic_counter_bits {
> +	u8         packets[0x40];
> +	u8         octets[0x40];
> +};
> +
> +struct mlx5_ifc_query_flow_counter_out_bits {
> +	u8         status[0x8];
> +	u8         reserved_at_8[0x18];
> +	u8         syndrome[0x20];
> +	u8         reserved_at_40[0x40];
> +	struct mlx5_ifc_traffic_counter_bits flow_statistics[]; };
> +
> +struct mlx5_ifc_query_flow_counter_in_bits {
> +	u8         opcode[0x10];
> +	u8         reserved_at_10[0x10];
> +	u8         reserved_at_20[0x10];
> +	u8         op_mod[0x10];
> +	u8         reserved_at_40[0x80];
> +	u8         clear[0x1];
> +	u8         reserved_at_c1[0xf];
> +	u8         num_of_counters[0x10];
> +	u8         flow_counter_id[0x20];
> +};
> +
> +/* mlx5_devx_cmds.c */
> +
> +int mlx5_devx_cmd_fc_alloc(struct ibv_context *ctx,
> +			   struct mlx5_devx_counter_set *dcx); int
> +mlx5_devx_cmd_fc_free(struct mlx5dv_devx_obj *obj); int
> +mlx5_devx_cmd_fc_query(struct mlx5_devx_counter_set *dcx,
> +			   int clear,
> +			   uint64_t *pkts, uint64_t *bytes);
> +
> +#endif /* RTE_PMD_MLX5_DEVX_CMDS_H_ */
> diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> index 2a3ce44..c083e80 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -21,6 +21,10 @@
>  #pragma GCC diagnostic error "-Wpedantic"
>  #endif
> 
> +#include "mlx5.h"
> +#include "mlx5_devx_cmds.h"
> +#include "mlx5_prm.h"
> +
>  /* Pattern outer Layer bits. */
>  #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)  #define
> MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1) @@ -269,13 +273,17 @@
> struct mlx5_flow {  struct mlx5_flow_counter {
>  	LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next
> counter. */
>  	uint32_t shared:1; /**< Share counter ID with other flow rules. */
> -	uint32_t ref_cnt:31; /**< Reference counter. */
> +	uint32_t ref_cnt:30; /**< Reference counter. */
> +	uint32_t devx_cnt:1; /**< Devx counter */

I don't understand why this flag is needed.
The flow engine is fixed on the command line paramters. 
If dv was chosen then it will be only the dcs counter. Otherwise (verbs) it will the relevant verbs counter according to ifdef. 

>  	uint32_t id; /**< Counter ID. */
> +	union {  /**< Holds the counters for the rule. */
>  #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
> -	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
> +		struct ibv_counter_set *cs;
>  #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
> -	struct ibv_counters *cs; /**< Holds the counters for the rule. */
> +		struct ibv_counters *cs;
>  #endif
> +		struct mlx5_devx_counter_set *dcs;
> +	};
>  	uint64_t hits; /**< Number of packets matched by the rule. */
>  	uint64_t bytes; /**< Number of bytes matched by the rule. */  }; diff
> --git a/drivers/net/mlx5/mlx5_flow_dv.c
> b/drivers/net/mlx5/mlx5_flow_dv.c index c11ecd4..8fa5de8 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -32,6 +32,7 @@
>  #include "mlx5_prm.h"
>  #include "mlx5_glue.h"
>  #include "mlx5_flow.h"
> +#include "mlx5_devx_cmds.h"
> 
>  #ifdef HAVE_IBV_FLOW_DV_SUPPORT
> 
> @@ -704,6 +705,88 @@
>  }
> 
>  /**
> + * Get or create a flow counter.
> + *
> + * @param[in] dev
> + *   Pointer to the Ethernet device structure.
> + * @param[in] shared
> + *   Indicate if this counter is shared with other flows.
> + * @param[in] id
> + *   Counter identifier.
> + *
> + * @return
> + *   A pointer to the counter, NULL otherwise and rte_errno is set.
> + */
> +static struct mlx5_flow_counter *
> +flow_dv_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t
> +id) {
> +	struct priv *priv = dev->data->dev_private;
> +	struct mlx5_flow_counter *cnt = NULL;
> +	struct mlx5_devx_counter_set *dcs = NULL;
> +	int ret;
> +
> +	if (!priv->config.devx) {
> +		ret = -EINVAL;
> +		goto error_exit;
> +	}
> +	if (shared) {
> +		LIST_FOREACH(cnt, &priv->flow_counters, next) {
> +			if (cnt->shared && cnt->id == id) {
> +				cnt->ref_cnt++;
> +				return cnt;
> +			}
> +		}
> +	}
> +	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
> +	dcs = rte_calloc(__func__, 1, sizeof(*dcs), 0);
> +	if (!dcs || !cnt) {
> +		ret = -ENOMEM;
> +		goto error_exit;
> +	}
> +	ret = mlx5_devx_cmd_fc_alloc(priv->ctx, dcs);
> +	if (ret)
> +		goto error_exit;
> +	struct mlx5_flow_counter tmpl = {
> +		.shared = shared,
> +		.ref_cnt = 1,
> +		.devx_cnt = 1,
> +		.id = id,
> +		.dcs = dcs,
> +	};
> +	*cnt = tmpl;
> +	LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
> +	return cnt;
> +error_exit:
> +	rte_free(cnt);
> +	rte_free(dcs);
> +	rte_errno = -ret;
> +	return NULL;
> +}
> +
> +/**
> + * Release a flow counter.
> + *
> + * @param[in] counter
> + *   Pointer to the counter handler.
> + */
> +static void
> +flow_dv_counter_release(struct mlx5_flow_counter *counter) {
> +	int ret;
> +
> +	if (!counter)
> +		return;
> +	if (--counter->ref_cnt == 0) {
> +		ret = mlx5_devx_cmd_fc_free(counter->dcs->obj);
> +		if (ret)
> +			DRV_LOG(ERR, "Failed to free devx counters, %d",
> ret);
> +		LIST_REMOVE(counter, next);
> +		rte_free(counter->dcs);
> +		rte_free(counter);
> +	}
> +}
> +
> +/**
>   * Verify the @p attributes will be correctly understood by the NIC and store
>   * them in the @p flow if everything is correct.
>   *
> @@ -778,6 +861,9 @@
>  	int tunnel = 0;
>  	uint8_t next_protocol = 0xff;
>  	int actions_n = 0;
> +#ifdef HAVE_IBV_FLOW_DEVX_COUNTERS

Why this ifdef is needed ?

> +	struct priv *priv = dev->data->dev_private; #endif
> 
>  	if (items == NULL)
>  		return -1;
> @@ -941,9 +1027,14 @@
>  			++actions_n;
>  			break;
>  		case RTE_FLOW_ACTION_TYPE_COUNT:
> -			ret = mlx5_flow_validate_action_count(dev, attr,
> error);
> -			if (ret < 0)
> -				return ret;
> +#ifdef HAVE_IBV_FLOW_DEVX_COUNTERS

Same question.

> +			if (!priv->config.devx)
> +#endif
> +				return rte_flow_error_set
> +					      (error, ENOTSUP,
> +
> RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +					       NULL,
> +					       "count action not supported");
>  			action_flags |= MLX5_FLOW_ACTION_COUNT;
>  			++actions_n;
>  			break;
> @@ -1693,6 +1784,7 @@
>  	}
>  }
> 
> +

Extra space. 

>  /**
>   * Store the requested actions in an array.
>   *
> @@ -1719,6 +1811,7 @@
>  {
>  	const struct rte_flow_action_queue *queue;
>  	const struct rte_flow_action_rss *rss;
> +	const struct rte_flow_action_count *count;
>  	int actions_n = dev_flow->dv.actions_n;
>  	struct rte_flow *flow = dev_flow->flow;
>  	const struct rte_flow_action *action_ptr = action; @@ -1837,6
> +1930,22 @@
>  		/* If decap is followed by encap, handle it at encap case. */
>  		flow->actions |= MLX5_FLOW_ACTION_RAW_DECAP;
>  		break;
> +	case RTE_FLOW_ACTION_TYPE_COUNT:
> +		count = action->conf;
> +		flow->counter = flow_dv_counter_new(dev,
> +						    count->shared, count->id);
> +		if (!flow->counter)
> +			return rte_flow_error_set(error, rte_errno,
> +
> RTE_FLOW_ERROR_TYPE_ACTION,
> +						  action,
> +						  "cannot create counter"
> +						  " object.");
> +		dev_flow->dv.actions[actions_n].type =
> +
> 	MLX5DV_FLOW_ACTION_COUNTER_DEVX;
> +		dev_flow->dv.actions[actions_n].obj = flow->counter->dcs-
> >obj;
> +		flow->actions |= MLX5_FLOW_ACTION_COUNT;
> +		actions_n++;
> +		break;
>  	default:
>  		break;
>  	}
> @@ -2203,8 +2312,6 @@
>  			dv->hrxq = NULL;
>  		}
>  	}
> -	if (flow->counter)
> -		flow->counter = NULL;

This is a bug fix right? Should be on different commit. 

>  }
> 
>  /**
> @@ -2223,6 +2330,10 @@
>  	if (!flow)
>  		return;
>  	flow_dv_remove(dev, flow);
> +	if (flow->counter) {
> +		flow_dv_counter_release(flow->counter);
> +		flow->counter = NULL;
> +	}

Same. 

>  	while (!LIST_EMPTY(&flow->dev_flows)) {
>  		dev_flow = LIST_FIRST(&flow->dev_flows);
>  		LIST_REMOVE(dev_flow, next);
> @@ -2235,6 +2346,55 @@
>  }
> 
>  /**
> + * Query a dv flow  rule for its statistics via devx.
> + *
> + * @param[in] dev
> + *   Pointer to Ethernet device.
> + * @param[in] flow
> + *   Pointer to the sub flow.
> + * @param[out] data
> + *   data retrieved by the query.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +flow_dv_query_count(struct rte_flow *flow, void *data,
> +		    struct rte_flow_error *error)
> +{
> +	struct rte_flow_query_count *qc = data;
> +	uint64_t pkts = 0;
> +	uint64_t bytes = 0;
> +	int err;
> +
> +	if (flow->counter) {
> +		err = mlx5_devx_cmd_fc_query(flow->counter->dcs,
> +					     qc->reset, &pkts, &bytes);
> +		if (err)
> +			return rte_flow_error_set
> +				(error, err,
> +				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +				 NULL,
> +				 "cannot read counter");
> +		qc->hits_set = 1;
> +		qc->bytes_set = 1;
> +		qc->hits = pkts - flow->counter->hits;
> +		qc->bytes = bytes - flow->counter->bytes;
> +		if (qc->reset) {
> +			flow->counter->hits = pkts;
> +			flow->counter->bytes = bytes;
> +		}
> +		return 0;
> +	}
> +	return rte_flow_error_set(error, ENOTSUP,
> +				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +				  NULL,
> +				  "counters are not available");
> +}
> +
> +/**
>   * Query a flow.
>   *
>   * @see rte_flow_query()
> @@ -2247,8 +2407,23 @@
>  	      void *data __rte_unused,
>  	      struct rte_flow_error *error __rte_unused)  {
> -	rte_errno = ENOTSUP;
> -	return -rte_errno;
> +	int ret = -EINVAL;
> +
> +	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
> +		switch (actions->type) {
> +		case RTE_FLOW_ACTION_TYPE_VOID:
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_COUNT:
> +			ret = flow_dv_query_count(flow, data, error);
> +			break;
> +		default:
> +			return rte_flow_error_set(error, ENOTSUP,
> +
> RTE_FLOW_ERROR_TYPE_ACTION,
> +						  actions,
> +						  "action not supported");
> +		}
> +	}
> +	return ret;
>  }
> 
> 
> diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c
> b/drivers/net/mlx5/mlx5_flow_verbs.c
> index 2e506b9..ba63945 100644
> --- a/drivers/net/mlx5/mlx5_flow_verbs.c
> +++ b/drivers/net/mlx5/mlx5_flow_verbs.c
> @@ -117,13 +117,13 @@
>  	struct mlx5_flow_counter *cnt;
>  	int ret;
> 
> -	LIST_FOREACH(cnt, &priv->flow_counters, next) {
> -		if (!cnt->shared || cnt->shared != shared)
> -			continue;
> -		if (cnt->id != id)
> -			continue;
> -		cnt->ref_cnt++;
> -		return cnt;
> +	if (shared) {
> +		LIST_FOREACH(cnt, &priv->flow_counters, next) {
> +			if (cnt->shared && cnt->id == id) {
> +				cnt->ref_cnt++;
> +				return cnt;
> +			}
> +		}

This logic change, while correct, needs to be performed on a different patch. 

>  	}
>  	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
>  	if (!cnt) {
> diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
> index 29742b1..5ff2dd0 100644
> --- a/drivers/net/mlx5/mlx5_prm.h
> +++ b/drivers/net/mlx5/mlx5_prm.h
> @@ -368,6 +368,7 @@ struct mlx5_modification_cmd {  #define
> __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - \
>  				    (__mlx5_bit_off(typ, fld) & 0x1f))  #define
> __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32)
> +#define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64)
>  #define __mlx5_dw_mask(typ, fld) (__mlx5_mask(typ, fld) << \
>  				  __mlx5_dw_bit_off(typ, fld))
>  #define __mlx5_mask(typ, fld) ((u32)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
> @@ -375,6 +376,7 @@ struct mlx5_modification_cmd {  #define
> __mlx5_16_bit_off(typ, fld) (16 - __mlx5_bit_sz(typ, fld) - \
>  				    (__mlx5_bit_off(typ, fld) & 0xf))  #define
> __mlx5_mask16(typ, fld) ((u16)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
> +#define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) /
> +8)
>  #define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32)
> #define MLX5_ST_SZ_DB(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8)
> #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) @@ -391,10
> +393,16 @@ struct mlx5_modification_cmd {
>  				 (((_v) & __mlx5_mask(typ, fld)) << \
>  				   __mlx5_dw_bit_off(typ, fld))); \
>  	} while (0)
> +#define MLX5_GET(typ, p, fld) \
> +	((rte_be_to_cpu_32(*((__be32 *)(p) +\
> +	__mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \
> +	__mlx5_mask(typ, fld))
>  #define MLX5_GET16(typ, p, fld) \
>  	((rte_be_to_cpu_16(*((__be16 *)(p) + \
>  	  __mlx5_16_off(typ, fld))) >> __mlx5_16_bit_off(typ, fld)) & \
>  	 __mlx5_mask16(typ, fld))
> +#define MLX5_GET64(typ, p, fld) rte_be_to_cpu_64(*((__be64 *)(p) + \
> +						   __mlx5_64_off(typ, fld)))
>  #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8)
> 
>  struct mlx5_ifc_fte_match_set_misc_bits {
> --
> 1.8.3.1

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2018-12-06 13:40 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-04 12:29 [dpdk-dev] [PATCH] net/mlx5: support flow counters using devx Mordechay Haimovsky
2018-12-06 13:40 ` Shahaf Shuler

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).