DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD
@ 2021-04-27 15:37 Bing Zhao
  2021-04-27 15:37 ` [dpdk-dev] [PATCH 01/17] common/mlx5: add connection tracking object definition Bing Zhao
                   ` (23 more replies)
  0 siblings, 24 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:37 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

This patch set adds the connection tracking offload support in the
mlx5 driver.

Bing Zhao (17):
  common/mlx5: add connection tracking object definition
  common/mlx5: add CT offload capability checking
  net/mlx5: use meter color reg for CT
  net/mlx5: initialization of CT management
  common/mlx5: add Dexv CT objects creation
  net/mlx5: add modify support for CT
  net/mlx5: add actions creating for CT
  net/mlx5: close CT management structure
  net/mlx5: add ASO CT query implementation
  net/mlx5: add ASO CT destroy handling
  net/mlx5: add translation for CT action
  net/mlx5: add translation of CT item
  net/mlx5: add CT context update
  net/mlx5: validation of CT action
  net/mlx5: validation of CT item
  net/mlx5: reduce the reference count of CT
  net/mlx5: add support of CT between two ports

 drivers/common/mlx5/linux/meson.build |   2 +
 drivers/common/mlx5/mlx5_devx_cmds.c  |  53 +++
 drivers/common/mlx5/mlx5_devx_cmds.h  |   5 +
 drivers/common/mlx5/mlx5_prm.h        |  88 +++++
 drivers/common/mlx5/version.map       |   1 +
 drivers/net/mlx5/linux/mlx5_os.c      |  13 +
 drivers/net/mlx5/mlx5.c               |  92 ++++++
 drivers/net/mlx5/mlx5.h               |  72 +++++
 drivers/net/mlx5/mlx5_flow.c          |  44 ++-
 drivers/net/mlx5/mlx5_flow.h          |  99 +++++-
 drivers/net/mlx5/mlx5_flow_aso.c      | 586 ++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c       | 571 ++++++++++++++++++++++++++++++++-
 12 files changed, 1623 insertions(+), 3 deletions(-)

-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 01/17] common/mlx5: add connection tracking object definition
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
@ 2021-04-27 15:37 ` Bing Zhao
  2021-04-27 15:37 ` [dpdk-dev] [PATCH 02/17] common/mlx5: add CT offload capability checking Bing Zhao
                   ` (22 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:37 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

The structures of ASO connection tracking offload object are added
based on the definitions in the PRM. One CT object context will be
loaded into the cache completely in a reversed order of dwords. The
valid bit should be the MSB of the last dword. This is used for the
conntrack context creation and update, as well as the query.

The capabilities 2 (HCA_CAP_2) layout is also added. The connection
tracking related capabilities could be queried via the HCA_CAP_2.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/mlx5_prm.h | 85 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index c6d8060..853eb58 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1119,6 +1119,7 @@ enum {
 	MLX5_GET_HCA_CAP_OP_MOD_ROCE = 0x4 << 1,
 	MLX5_GET_HCA_CAP_OP_MOD_NIC_FLOW_TABLE = 0x7 << 1,
 	MLX5_GET_HCA_CAP_OP_MOD_VDPA_EMULATION = 0x13 << 1,
+	MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE_2 = 0x20 << 1,
 };
 
 #define MLX5_GENERAL_OBJ_TYPES_CAP_VIRTQ_NET_Q \
@@ -1661,6 +1662,29 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties;
 };
 
+struct mlx5_ifc_cmd_hca_cap_2_bits {
+	u8 reserved_at_0[0x80]; /* End of DW4. */
+	u8 reserved_at_80[0xb];
+	u8 log_max_num_reserved_qpn[0x5];
+	u8 reserved_at_90[0x3];
+	u8 log_reserved_qpn_granularity[0x5];
+	u8 reserved_at_98[0x3];
+	u8 log_reserved_qpn_max_alloc[0x5]; /* End of DW5. */
+	u8 max_reformat_insert_size[0x8];
+	u8 max_reformat_insert_offset[0x8];
+	u8 max_reformat_remove_size[0x8];
+	u8 max_reformat_remove_offset[0x8]; /* End of DW6. */
+	u8 aso_conntrack_reg_id[0x8];
+	u8 reserved_at_c8[0x3];
+	u8 log_conn_track_granularity[0x5];
+	u8 reserved_at_d0[0x3];
+	u8 log_conn_track_max_alloc[0x5];
+	u8 reserved_at_d8[0x3];
+	u8 log_max_conn_track_offload[0x5];
+	u8 reserved_at_e0[0x20]; /* End of DW7. */
+	u8 reserved_at_100[0x700];
+};
+
 union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
 	struct mlx5_ifc_per_protocol_networking_offload_caps_bits
@@ -2592,6 +2616,67 @@ struct mlx5_ifc_create_flow_meter_aso_in_bits {
 	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
 	struct mlx5_ifc_flow_meter_aso_bits flow_meter_aso;
 };
+
+struct mlx5_ifc_tcp_window_params_bits {
+	u8 max_ack[0x20];
+	u8 max_win[0x20];
+	u8 reply_end[0x20];
+	u8 sent_end[0x20];
+};
+
+struct mlx5_ifc_conn_track_aso_bits {
+	struct mlx5_ifc_tcp_window_params_bits reply_dir; /* End of DW3. */
+	struct mlx5_ifc_tcp_window_params_bits original_dir; /* End of DW7. */
+	u8 last_end[0x20]; /* End of DW8. */
+	u8 last_ack[0x20]; /* End of DW9. */
+	u8 last_seq[0x20]; /* End of DW10. */
+	u8 last_win[0x10];
+	u8 reserved_at_170[0xa];
+	u8 last_dir[0x1];
+	u8 last_index[0x5]; /* End of DW11. */
+	u8 reserved_at_180[0x40]; /* End of DW13. */
+	u8 reply_dircetion_tcp_scale[0x4];
+	u8 reply_dircetion_tcp_close_initiated[0x1];
+	u8 reply_dircetion_tcp_liberal_enabled[0x1];
+	u8 reply_dircetion_tcp_data_unacked[0x1];
+	u8 reply_dircetion_tcp_max_ack[0x1];
+	u8 reserved_at_1c8[0x8];
+	u8 original_dircetion_tcp_scale[0x4];
+	u8 original_dircetion_tcp_close_initiated[0x1];
+	u8 original_dircetion_tcp_liberal_enabled[0x1];
+	u8 original_dircetion_tcp_data_unacked[0x1];
+	u8 original_dircetion_tcp_max_ack[0x1];
+	u8 reserved_at_1d8[0x8]; /* End of DW14. */
+	u8 valid[0x1];
+	u8 state[0x3];
+	u8 freeze_track[0x1];
+	u8 reserved_at_1e5[0xb];
+	u8 reserved_at_1f0[0x1];
+	u8 connection_assured[0x1];
+	u8 sack_permitted[0x1];
+	u8 challenged_acked[0x1];
+	u8 heartbeat[0x1];
+	u8 max_ack_window[0x3];
+	u8 reserved_at_1f8[0x1];
+	u8 retransmission_counter[0x3];
+	u8 retranmission_limit_exceeded[0x1];
+	u8 retranmission_limit[0x3]; /* End of DW15. */
+};
+
+struct mlx5_ifc_conn_track_offload_bits {
+	u8 modify_field_select[0x40];
+	u8 reserved_at_40[0x40];
+	u8 reserved_at_80[0x8];
+	u8 conn_track_aso_access_pd[0x18];
+	u8 reserved_at_a0[0x160];
+	struct mlx5_ifc_conn_track_aso_bits conn_track_aso;
+};
+
+struct mlx5_ifc_create_conn_track_aso_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+	struct mlx5_ifc_conn_track_offload_bits conn_track_offload;
+};
+
 enum mlx5_access_aso_opc_mod {
 	ASO_OPC_MOD_IPSEC = 0x0,
 	ASO_OPC_MOD_CONNECTION_TRACKING = 0x1,
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 02/17] common/mlx5: add CT offload capability checking
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
  2021-04-27 15:37 ` [dpdk-dev] [PATCH 01/17] common/mlx5: add connection tracking object definition Bing Zhao
@ 2021-04-27 15:37 ` Bing Zhao
  2021-04-27 15:37 ` [dpdk-dev] [PATCH 03/17] net/mlx5: use meter color reg for CT Bing Zhao
                   ` (21 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:37 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

During startup, the ASO connection tracking offload capability could
be queried via HCA_CAP_QUERY command. If the HW doesn't support ASO
CT, the value would be 0 by default. The following initialization
should be skipped and the creation of the CT object should return
a failure directly.

The following CT creation should also check this capability. With
the old driver, the pre-processing macro should be used in order to
make the compiling pass.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/linux/meson.build | 2 ++
 drivers/common/mlx5/mlx5_devx_cmds.c  | 3 +++
 drivers/common/mlx5/mlx5_devx_cmds.h  | 1 +
 drivers/common/mlx5/mlx5_prm.h        | 3 +++
 4 files changed, 9 insertions(+)

diff --git a/drivers/common/mlx5/linux/meson.build b/drivers/common/mlx5/linux/meson.build
index 3334bd5..007834a 100644
--- a/drivers/common/mlx5/linux/meson.build
+++ b/drivers/common/mlx5/linux/meson.build
@@ -189,6 +189,8 @@ has_sym_args = [
             'MLX5_WQE_UMR_CTRL_FLAG_INLINE' ],
         [ 'HAVE_MLX5_DR_FLOW_DUMP_RULE', 'infiniband/mlx5dv.h',
             'mlx5dv_dump_dr_rule' ],
+        [ 'HAVE_MLX5_DR_ACTION_ASO_CT', 'infiniband/mlx5dv.h',
+            'MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_INITIATOR' ],
 ]
 config = configuration_data()
 foreach arg:has_sym_args
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 6c6f439..4300536 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -760,6 +760,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
 		MLX5_GET(cmd_hca_cap, hcattr, umr_indirect_mkey_disabled);
 	attr->umr_modify_entity_size_disabled =
 		MLX5_GET(cmd_hca_cap, hcattr, umr_modify_entity_size_disabled);
+	attr->ct_offload = !!(MLX5_GET64(cmd_hca_cap, hcattr,
+					 general_obj_types) &
+			      MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD);
 	if (attr->qos.sup) {
 		MLX5_SET(query_hca_cap_in, in, op_mod,
 			 MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP |
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index eee8fee..956b0b1 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -136,6 +136,7 @@ struct mlx5_hca_attr {
 	uint32_t qp_ts_format:2;
 	uint32_t regex:1;
 	uint32_t reg_c_preserve:1;
+	uint32_t ct_offload:1; /* General obj type ASO CT offload supported. */
 	uint32_t regexp_num_of_engines;
 	uint32_t log_max_ft_sampler_num:8;
 	uint32_t geneve_tlv_opt;
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 853eb58..d9987e1 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1134,6 +1134,8 @@ enum {
 			(1ULL << MLX5_GENERAL_OBJ_TYPE_FLOW_METER_ASO)
 #define MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT \
 			(1ULL << MLX5_OBJ_TYPE_GENEVE_TLV_OPT)
+#define MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD \
+			(1ULL << MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD)
 
 enum {
 	MLX5_HCA_CAP_OPMOD_GET_MAX   = 0,
@@ -2449,6 +2451,7 @@ enum {
 	MLX5_GENERAL_OBJ_TYPE_FLEX_PARSE_GRAPH = 0x0022,
 	MLX5_GENERAL_OBJ_TYPE_FLOW_METER_ASO = 0x0024,
 	MLX5_GENERAL_OBJ_TYPE_FLOW_HIT_ASO = 0x0025,
+	MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD = 0x0031,
 };
 
 struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 03/17] net/mlx5: use meter color reg for CT
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
  2021-04-27 15:37 ` [dpdk-dev] [PATCH 01/17] common/mlx5: add connection tracking object definition Bing Zhao
  2021-04-27 15:37 ` [dpdk-dev] [PATCH 02/17] common/mlx5: add CT offload capability checking Bing Zhao
@ 2021-04-27 15:37 ` Bing Zhao
  2021-04-27 15:37 ` [dpdk-dev] [PATCH 04/17] net/mlx5: initialization of CT management Bing Zhao
                   ` (20 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:37 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

Based on the capacity, 3 registers could be used. Due to the register
allocation, only the one REG_C_3 for meter color could be reused
right now.

Then in the same flow, no more than one ASO action can be supported.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.c | 4 +++-
 drivers/net/mlx5/mlx5_flow.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 15ed5ec..1c28b63 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -759,7 +759,9 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
 			       REG_C_3;
 	case MLX5_MTR_COLOR:
-	case MLX5_ASO_FLOW_HIT: /* Both features use the same REG_C. */
+	case MLX5_ASO_FLOW_HIT:
+	case MLX5_ASO_CONNTRACK:
+		/* All features use the same REG_C. */
 		MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
 		return priv->mtr_color_reg;
 	case MLX5_COPY_MARK:
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 56908ae..59769e9 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -84,6 +84,7 @@ enum mlx5_feature_name {
 	MLX5_MTR_COLOR,
 	MLX5_MTR_ID,
 	MLX5_ASO_FLOW_HIT,
+	MLX5_ASO_CONNTRACK,
 };
 
 /* Default queue number. */
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 04/17] net/mlx5: initialization of CT management
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (2 preceding siblings ...)
  2021-04-27 15:37 ` [dpdk-dev] [PATCH 03/17] net/mlx5: use meter color reg for CT Bing Zhao
@ 2021-04-27 15:37 ` Bing Zhao
  2021-04-27 15:37 ` [dpdk-dev] [PATCH 05/17] common/mlx5: add Dexv CT objects creation Bing Zhao
                   ` (19 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:37 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

The definitions of ASO connection tracking objects management
structures are added.

Considering performance, the bulk allocation of ASO CT objects
should be used. The maximal value per bulk and the granularity could
be fetched from HCA capabilities 2. Right now, a fixed number of 64
is used for each bulk for a better management purpose.

The ASO QP for CT is initialized, the SQ will be used for both
modify and query command.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c | 13 +++++++++++
 drivers/net/mlx5/mlx5.c          | 36 +++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5.h          | 50 ++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_aso.c | 49 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 148 insertions(+)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 17d0533..e3a40ac 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1322,6 +1322,19 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			DRV_LOG(DEBUG, "Flow Hit ASO is supported.");
 		}
 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */
+#if defined(HAVE_MLX5_DR_CREATE_ACTION_ASO) && \
+	defined(HAVE_MLX5_DR_ACTION_ASO_CT)
+		if (config->hca_attr.ct_offload &&
+		    priv->mtr_color_reg == REG_C_3) {
+			err = mlx5_flow_aso_ct_mng_init(sh);
+			if (err) {
+				err = -err;
+				goto error;
+			}
+			DRV_LOG(DEBUG, "CT ASO is supported.");
+			sh->ct_aso_en = 1;
+		}
+#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO && HAVE_MLX5_DR_ACTION_ASO_CT */
 #if defined(HAVE_MLX5DV_DR) && defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_SAMPLE)
 		if (config->hca_attr.log_max_ft_sampler_num > 0  &&
 		    config->dv_flow_en) {
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 19ffa16..a884234 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -670,6 +670,42 @@ mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh)
 	}
 }
 
+/*
+ * Initialize the ASO connection tracking structure.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
+{
+	int err;
+
+	if (sh->ct_mng)
+		return 0;
+	sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng),
+				 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+	if (!sh->ct_mng) {
+		DRV_LOG(ERR, "ASO CT management allocation failed.");
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
+	if (err) {
+		mlx5_free(sh->ct_mng);
+		/* rte_errno should be extracted from the failure. */
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	rte_spinlock_init(&sh->ct_mng->ct_sl);
+	rte_rwlock_init(&sh->ct_mng->resize_rwl);
+	LIST_INIT(&sh->ct_mng->free_cts);
+	return 0;
+}
+
 /**
  * Initialize the flow resources' indexed mempool.
  *
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 378b68e..0a7e03e 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -991,6 +991,52 @@ struct mlx5_bond_info {
 	} ports[MLX5_BOND_MAX_PORTS];
 };
 
+/* Number of connection tracking objects per pool: must be a power of 2. */
+#define MLX5_ASO_CT_ACTIONS_PER_POOL 64
+
+/* ASO Conntrack state. */
+enum mlx5_aso_ct_state {
+	ASO_CONNTRACK_FREE, /* Inactive, in the free list. */
+	ASO_CONNTRACK_WAIT, /* WQE sent in the SQ. */
+	ASO_CONNTRACK_READY, /* CQE received w/o error. */
+	ASO_CONNTRACK_QUERY, /* WQE for query sent. */
+	ASO_CONNTRACK_MAX, /* Guard. */
+};
+
+/* Generic ASO connection tracking structure. */
+struct mlx5_aso_ct_action {
+	LIST_ENTRY(mlx5_aso_ct_action) next; /* Pointer to the next ASO CT. */
+	void *dr_action_orig; /* General action object for original dir. */
+	void *dr_action_rply; /* General action object for reply dir. */
+	uint32_t refcnt; /* Action used count in device flows. */
+	uint16_t offset; /* Offset of ASO CT in DevX objects bulk. */
+	uint16_t peer; /* The only peer port index could also use this CT. */
+	uint8_t state; /* ASO CT state. */
+	bool is_original; /* The direction of the DR action to be used. */
+};
+
+/* ASO connection tracking software pool definition. */
+struct mlx5_aso_ct_pool {
+	uint16_t index; /* Pool index in pools array. */
+	struct mlx5_devx_obj *devx_obj;
+	/* The first devx object in the bulk, used for freeing (not yet). */
+	struct mlx5_aso_ct_action actions[MLX5_ASO_CT_ACTIONS_PER_POOL];
+	/* CT action structures bulk. */
+};
+
+LIST_HEAD(aso_ct_list, mlx5_aso_ct_action);
+
+/* Pools management structure for ASO connection tracking pools. */
+struct mlx5_aso_ct_pools_mng {
+	struct mlx5_aso_ct_pool **pools;
+	uint16_t n; /* Total number of pools. */
+	uint16_t next; /* Number of pools in use, index of next free pool. */
+	rte_spinlock_t ct_sl; /* The ASO CT free list lock. */
+	rte_rwlock_t resize_rwl; /* The ASO CT pool resize lock. */
+	struct aso_ct_list free_cts; /* Free ASO CT objects list. */
+	struct mlx5_aso_sq aso_sq; /* ASO queue objects. */
+};
+
 /*
  * Shared Infiniband device context for Master/Representors
  * which belong to same IB device with multiple IB ports.
@@ -1004,6 +1050,7 @@ struct mlx5_dev_ctx_shared {
 	uint32_t sq_ts_format:2; /* SQ timestamp formats supported. */
 	uint32_t qp_ts_format:2; /* QP timestamp formats supported. */
 	uint32_t meter_aso_en:1; /* Flow Meter ASO is supported. */
+	uint32_t ct_aso_en:1; /* Connection Tracking ASO is supported. */
 	uint32_t max_port; /* Maximal IB device port index. */
 	struct mlx5_bond_info bond; /* Bonding information. */
 	void *ctx; /* Verbs/DV/DevX context. */
@@ -1066,6 +1113,8 @@ struct mlx5_dev_ctx_shared {
 	rte_spinlock_t geneve_tlv_opt_sl; /* Lock for geneve tlv resource */
 	struct mlx5_flow_mtr_mng *mtrmng;
 	/* Meter management structure. */
+	struct mlx5_aso_ct_pools_mng *ct_mng;
+	/* Management data for ASO connection tracking. */
 	struct mlx5_dev_shared_port port[]; /* per device port data array. */
 };
 
@@ -1363,6 +1412,7 @@ bool mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev);
 int mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev);
 int mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh);
 int mlx5_aso_flow_mtrs_mng_init(struct mlx5_dev_ctx_shared *sh);
+int mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh);
 
 /* mlx5_ethdev.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 20cd4fe..d0aa09f 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -212,6 +212,44 @@ mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq)
 	}
 }
 
+/*
+ * Initialize Send Queue used for ASO connection tracking.
+ *
+ * @param[in] sq
+ *   ASO SQ to initialize.
+ */
+static void
+mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
+{
+	volatile struct mlx5_aso_wqe *restrict wqe;
+	int i;
+	int size = 1 << sq->log_desc_n;
+	uint64_t addr;
+
+	/* All the next fields state should stay constant. */
+	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
+		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
+							  (sizeof(*wqe) >> 4));
+		/* One unique MR for the query data. */
+		wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.mkey->id);
+		/* Magic number 64 represents the length of a ASO CT obj. */
+		addr = (uint64_t)((uintptr_t)sq->mr.buf + i * 64);
+		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
+		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
+		wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
+			(0u |
+			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
+			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
+		/*
+		 * Data mask may be different for each modification.
+		 * In most cases, a full modification with mask UINT64_MAX is
+		 * used to update all 64 bytes.
+		 */
+	}
+}
+
 /**
  * Create Send Queue used for ASO access.
  *
@@ -317,6 +355,17 @@ mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
 			return -1;
 		mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
 		break;
+	case ASO_OPC_MOD_CONNECTION_TRACKING:
+		/* 64B per object for query. */
+		if (mlx5_aso_devx_reg_mr(sh->ctx, 64 * sq_desc_n,
+			&sh->ct_mng->aso_sq.mr, 0, sh->pdn))
+			return -1;
+		if (mlx5_aso_sq_create(sh->ctx, &sh->ct_mng->aso_sq, 0,
+				sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
+				sh->sq_ts_format))
+			return -1;
+		mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
+		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
 		return -1;
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 05/17] common/mlx5: add Dexv CT objects creation
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (3 preceding siblings ...)
  2021-04-27 15:37 ` [dpdk-dev] [PATCH 04/17] net/mlx5: initialization of CT management Bing Zhao
@ 2021-04-27 15:37 ` Bing Zhao
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 06/17] net/mlx5: add modify support for CT Bing Zhao
                   ` (18 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:37 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

Adding support for connection tracking ASO creation via Devx command.
Right now only bulk creation is supported.

By default, the objects with zero contents will be created. Before
using a single object, the modification via posting a WQE to the ASO
CT SQ is needed.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c | 50 ++++++++++++++++++++++++++++++++++++
 drivers/common/mlx5/mlx5_devx_cmds.h |  4 +++
 drivers/common/mlx5/version.map      |  1 +
 3 files changed, 55 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 4300536..f2e7ed4 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -2230,6 +2230,56 @@ mlx5_devx_cmd_create_flow_meter_aso_obj(void *ctx, uint32_t pd,
 	return flow_meter_aso_obj;
 }
 
+/*
+ * Create general object of type CONN_TRACK_OFFLOAD using DevX API.
+ *
+ * @param[in] ctx
+ *   Context returned from mlx5 open_device() glue function.
+ * @param [in] pd
+ *   PD value to associate the CONN_TRACK_OFFLOAD ASO object with.
+ * @param [in] log_obj_size
+ *   log_obj_size to allocate its power of 2 * objects
+ *   in one CONN_TRACK_OFFLOAD bulk allocation.
+ *
+ * @return
+ *   The DevX object created, NULL otherwise and rte_errno is set.
+ */
+struct mlx5_devx_obj *
+mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx, uint32_t pd,
+					    uint32_t log_obj_size)
+{
+	uint32_t in[MLX5_ST_SZ_DW(create_conn_track_aso_in)] = {0};
+	uint32_t out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	struct mlx5_devx_obj *ct_aso_obj;
+	void *ptr;
+
+	ct_aso_obj = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ct_aso_obj),
+				 0, SOCKET_ID_ANY);
+	if (!ct_aso_obj) {
+		DRV_LOG(ERR, "Failed to allocate CONN_TRACK_OFFLOAD object.");
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	ptr = MLX5_ADDR_OF(create_conn_track_aso_in, in, hdr);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, obj_type,
+		 MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, log_obj_range, log_obj_size);
+	ptr = MLX5_ADDR_OF(create_conn_track_aso_in, in, conn_track_offload);
+	MLX5_SET(conn_track_offload, ptr, conn_track_aso_access_pd, pd);
+	ct_aso_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
+						     out, sizeof(out));
+	if (!ct_aso_obj->obj) {
+		rte_errno = errno;
+		DRV_LOG(ERR, "Failed to create CONN_TRACK_OFFLOAD obj by using DevX.");
+		mlx5_free(ct_aso_obj);
+		return NULL;
+	}
+	ct_aso_obj->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	return ct_aso_obj;
+}
+
 /**
  * Create general object of type GENEVE TLV option using DevX API.
  *
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 956b0b1..435f6c4 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -568,6 +568,10 @@ struct mlx5_devx_obj *mlx5_devx_cmd_queue_counter_alloc(void *ctx);
 __rte_internal
 int mlx5_devx_cmd_queue_counter_query(struct mlx5_devx_obj *dcs, int clear,
 				      uint32_t *out_of_buffers);
+__rte_internal
+struct mlx5_devx_obj *mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx,
+					uint32_t pd, uint32_t log_obj_size);
+
 /**
  * Create general object of type FLOW_METER_ASO using DevX API..
  *
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index 18dc962..4bbcba5 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -13,6 +13,7 @@ INTERNAL {
 	mlx5_dev_to_pci_addr; # WINDOWS_NO_EXPORT
 
 	mlx5_devx_cmd_alloc_pd;
+	mlx5_devx_cmd_create_conn_track_offload_obj;
 	mlx5_devx_cmd_create_cq;
 	mlx5_devx_cmd_create_flex_parser;
 	mlx5_devx_cmd_create_qp;
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 06/17] net/mlx5: add modify support for CT
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (4 preceding siblings ...)
  2021-04-27 15:37 ` [dpdk-dev] [PATCH 05/17] common/mlx5: add Dexv CT objects creation Bing Zhao
@ 2021-04-27 15:38 ` Bing Zhao
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 07/17] net/mlx5: add actions creating " Bing Zhao
                   ` (17 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:38 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

After the connection tracking object bulk is allocated, all the
objects' contents are filled with zero by default. One object must
be modified via WQE operation before using it.

In order to reduce the latency for the flow creation, an asynchronous
way is used instead of busy waiting for the CQE to be generated.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |   6 +
 drivers/net/mlx5/mlx5_flow.h     |   3 +
 drivers/net/mlx5/mlx5_flow_aso.c | 288 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 297 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 0a7e03e..1d31813 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -498,6 +498,7 @@ struct mlx5_aso_sq_elem {
 			uint16_t burst_size;
 		};
 		struct mlx5_aso_mtr *mtr;
+		struct mlx5_aso_ct_action *ct;
 	};
 };
 
@@ -1700,5 +1701,10 @@ int mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr);
 int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr);
+int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			      struct mlx5_aso_ct_action *ct,
+			      const struct rte_flow_action_conntrack *profile);
+int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+			   struct mlx5_aso_ct_action *ct);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 59769e9..c3e7bf8 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -45,6 +45,7 @@ enum mlx5_rte_flow_action_type {
 enum {
 	MLX5_INDIRECT_ACTION_TYPE_RSS,
 	MLX5_INDIRECT_ACTION_TYPE_AGE,
+	MLX5_INDIRECT_ACTION_TYPE_AGE,
 };
 
 /* Matches on selected register. */
@@ -828,6 +829,8 @@ struct mlx5_flow {
 #define MLX5_ASO_WQE_CQE_RESPONSE_DELAY 10u
 #define MLX5_MTR_POLL_WQE_CQE_TIMES 100000u
 
+#define MLX5_CT_POLL_WQE_CQE_TIMES MLX5_MTR_POLL_WQE_CQE_TIMES
+
 #define MLX5_MAN_WIDTH 8
 /* Legacy Meter parameter structure. */
 struct mlx5_legacy_flow_meter {
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index d0aa09f..bb3221a 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -897,3 +897,291 @@ mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 			mtr->offset);
 	return -1;
 }
+
+/*
+ * Post a WQE to the ASO CT SQ to modify the context.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ * @param[in] ct
+ *   Pointer to the generic CT structure related to the context.
+ * @param[in] profile
+ *   Pointer to configuration profile.
+ *
+ * @return
+ *   1 on success (WQE number), 0 on failure.
+ */
+static uint16_t
+mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
+			      struct mlx5_aso_ct_action *ct,
+			      const struct rte_flow_action_conntrack *profile)
+{
+	volatile struct mlx5_aso_wqe *wqe = NULL;
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t res;
+	struct mlx5_aso_ct_pool *pool;
+	void *desg;
+	void *orig_dir;
+	void *reply_dir;
+
+	rte_spinlock_lock(&sq->sqsl);
+	/* Prevent other threads to update the index. */
+	res = size - (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!res)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
+		return 0;
+	}
+	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
+	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
+	/* Fill next WQE. */
+	__atomic_store_n(&ct->state, ASO_CONNTRACK_WAIT, __ATOMIC_RELAXED);
+	sq->elts[sq->head & mask].ct = ct;
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	/* Each WQE will have a single CT object. */
+	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
+						  ct->offset);
+	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
+			(ASO_OPC_MOD_CONNECTION_TRACKING <<
+			 WQE_CSEG_OPC_MOD_OFFSET) |
+			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
+	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
+			(0u |
+			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
+			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
+	wqe->aso_cseg.data_mask = UINT64_MAX;
+	/* To make compiler happy. */
+	desg = (void *)(uintptr_t)wqe->aso_dseg.data;
+	MLX5_SET(conn_track_aso, desg, valid, 1);
+	MLX5_SET(conn_track_aso, desg, state, profile->state);
+	MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable);
+	MLX5_SET(conn_track_aso, desg, connection_assured,
+		 profile->live_connection);
+	MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack);
+	MLX5_SET(conn_track_aso, desg, challenged_acked,
+		 profile->challenge_ack_passed);
+	/* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */
+	MLX5_SET(conn_track_aso, desg, heartbeat, 0);
+	MLX5_SET(conn_track_aso, desg, max_ack_window,
+		 profile->max_ack_window);
+	MLX5_SET(conn_track_aso, desg, retransmission_counter, 0);
+	MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0);
+	MLX5_SET(conn_track_aso, desg, retranmission_limit,
+		 profile->retransmission_limit);
+	MLX5_SET(conn_track_aso, desg, reply_dircetion_tcp_scale,
+		 profile->reply_dir.scale);
+	MLX5_SET(conn_track_aso, desg, reply_dircetion_tcp_close_initiated,
+		 profile->reply_dir.close_initiated);
+	/* Both directions will use the same liberal mode. */
+	MLX5_SET(conn_track_aso, desg, reply_dircetion_tcp_liberal_enabled,
+		 profile->liberal_mode);
+	MLX5_SET(conn_track_aso, desg, reply_dircetion_tcp_data_unacked,
+		 profile->reply_dir.data_unacked);
+	MLX5_SET(conn_track_aso, desg, reply_dircetion_tcp_max_ack,
+		 profile->reply_dir.last_ack_seen);
+	MLX5_SET(conn_track_aso, desg, original_dircetion_tcp_scale,
+		 profile->original_dir.scale);
+	MLX5_SET(conn_track_aso, desg, original_dircetion_tcp_close_initiated,
+		 profile->original_dir.close_initiated);
+	MLX5_SET(conn_track_aso, desg, original_dircetion_tcp_liberal_enabled,
+		 profile->liberal_mode);
+	MLX5_SET(conn_track_aso, desg, original_dircetion_tcp_data_unacked,
+		 profile->original_dir.data_unacked);
+	MLX5_SET(conn_track_aso, desg, original_dircetion_tcp_max_ack,
+		 profile->original_dir.last_ack_seen);
+	MLX5_SET(conn_track_aso, desg, last_win, profile->last_window);
+	MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction);
+	MLX5_SET(conn_track_aso, desg, last_index, profile->last_index);
+	MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq);
+	MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack);
+	MLX5_SET(conn_track_aso, desg, last_end, profile->last_end);
+	orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir);
+	MLX5_SET(tcp_window_params, orig_dir, sent_end,
+		 profile->original_dir.sent_end);
+	MLX5_SET(tcp_window_params, orig_dir, reply_end,
+		 profile->original_dir.reply_end);
+	MLX5_SET(tcp_window_params, orig_dir, max_win,
+		 profile->original_dir.max_win);
+	MLX5_SET(tcp_window_params, orig_dir, max_ack,
+		 profile->original_dir.max_ack);
+	reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir);
+	MLX5_SET(tcp_window_params, reply_dir, sent_end,
+		 profile->reply_dir.sent_end);
+	MLX5_SET(tcp_window_params, reply_dir, reply_end,
+		 profile->reply_dir.reply_end);
+	MLX5_SET(tcp_window_params, reply_dir, max_win,
+		 profile->reply_dir.max_win);
+	MLX5_SET(tcp_window_params, reply_dir, max_ack,
+		 profile->reply_dir.max_ack);
+	sq->head++;
+	sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
+	rte_io_wmb();
+	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
+	rte_wmb();
+	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
+	rte_wmb();
+	rte_spinlock_unlock(&sq->sqsl);
+	return 1;
+}
+
+/*
+ * Update the status field of CTs to indicate ready to be used by flows.
+ * A continuous number of CTs since last update.
+ *
+ * @param[in] sq
+ *   Pointer to ASO CT SQ.
+ * @param[in] num
+ *   Number of CT structures to be updated.
+ *
+ * @return
+ *   0 on success, a negative value.
+ */
+static void
+mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
+{
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t i;
+	struct mlx5_aso_ct_action *ct = NULL;
+	uint16_t idx;
+
+	for (i = 0; i < num; i++) {
+		idx = (uint16_t)((sq->tail + i) & mask);
+		ct = sq->elts[idx].ct;
+		MLX5_ASSERT(ct);
+		__atomic_store_n(&ct->state, ASO_CONNTRACK_READY,
+				 __ATOMIC_RELAXED);
+	}
+}
+
+/*
+ * Handle completions from WQEs sent to ASO CT.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ */
+static void
+mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
+{
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	struct mlx5_aso_cq *cq = &sq->cq;
+	volatile struct mlx5_cqe *restrict cqe;
+	const uint32_t cq_size = 1 << cq->log_desc_n;
+	const uint32_t mask = cq_size - 1;
+	uint32_t idx;
+	uint32_t next_idx = cq->cq_ci & mask;
+	uint16_t max;
+	uint16_t n = 0;
+	int ret;
+
+	rte_spinlock_lock(&sq->sqsl);
+	max = (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!max)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		return;
+	}
+	do {
+		idx = next_idx;
+		next_idx = (cq->cq_ci + 1) & mask;
+		/* Need to confirm the position of the prefetch. */
+		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
+		cqe = &cq->cq_obj.cqes[idx];
+		ret = check_cqe(cqe, cq_size, cq->cq_ci);
+		/*
+		 * Be sure owner read is done before any other cookie field or
+		 * opaque field.
+		 */
+		rte_io_rmb();
+		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
+			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
+				break;
+			mlx5_aso_cqe_err_handle(sq);
+		} else {
+			n++;
+		}
+		cq->cq_ci++;
+	} while (1);
+	if (likely(n)) {
+		mlx5_aso_ct_status_update(sq, n);
+		sq->tail += n;
+		rte_io_wmb();
+		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
+	}
+	rte_spinlock_unlock(&sq->sqsl);
+}
+
+/*
+ * Update connection tracking parameter by send WQE.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ * @param[in] profile
+ *   Pointer to connection tracking TCP parameter.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			  struct mlx5_aso_ct_action *ct,
+			  const struct rte_flow_action_conntrack *profile)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+
+	/* Assertion here. */
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		if (mlx5_aso_ct_sq_enqueue_single(mng, ct, profile))
+			return 0;
+		/* Waiting for wqe resource. */
+		rte_delay_us_sleep(10u);
+	} while (--poll_wqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+}
+
+/*
+ * Wait for conntrack context in the HW to be ready to use.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+		       struct mlx5_aso_ct_action *ct)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+
+	if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+	    ASO_CONNTRACK_READY)
+		return 0;
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+		    ASO_CONNTRACK_READY)
+			return 0;
+		/* Waiting for CQE ready, consider should block or sleep. */
+		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
+	} while (--poll_cqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+}
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 07/17] net/mlx5: add actions creating for CT
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (5 preceding siblings ...)
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 06/17] net/mlx5: add modify support for CT Bing Zhao
@ 2021-04-27 15:38 ` Bing Zhao
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 08/17] net/mlx5: close CT management structure Bing Zhao
                   ` (16 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:38 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

Allocating a CT from the management pools and creating the DR actions
for both directions by default.

If there is no available connection tracking action, a new pool will
be created with a fixed size bulk allocation. Right now, all the
resources are controlled by the linked list.

The ASO connection tracking context associated with these actions
need to be updated via WQE before using for steering.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h         |   4 +
 drivers/net/mlx5/mlx5_flow.h    |  27 ++++-
 drivers/net/mlx5/mlx5_flow_dv.c | 261 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 291 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1d31813..982c0c2 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -995,6 +995,10 @@ struct mlx5_bond_info {
 /* Number of connection tracking objects per pool: must be a power of 2. */
 #define MLX5_ASO_CT_ACTIONS_PER_POOL 64
 
+/* Generate incremental and unique CT index from pool and offset. */
+#define MLX5_MAKE_CT_IDX(pool, offset) \
+	((pool) * MLX5_ASO_CT_ACTIONS_PER_POOL + (offset) + 1)
+
 /* ASO Conntrack state. */
 enum mlx5_aso_ct_state {
 	ASO_CONNTRACK_FREE, /* Inactive, in the free list. */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index c3e7bf8..988b171 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -45,7 +45,7 @@ enum mlx5_rte_flow_action_type {
 enum {
 	MLX5_INDIRECT_ACTION_TYPE_RSS,
 	MLX5_INDIRECT_ACTION_TYPE_AGE,
-	MLX5_INDIRECT_ACTION_TYPE_AGE,
+	MLX5_INDIRECT_ACTION_TYPE_CT,
 };
 
 /* Matches on selected register. */
@@ -1286,6 +1286,31 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
 	return &pool->mtrs[idx % MLX5_ASO_MTRS_PER_POOL];
 }
 
+/*
+ * Get ASO CT action by index.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index to the ASO CT action.
+ *
+ * @return
+ *   The specified ASO CT action pointer.
+ */
+static inline struct mlx5_aso_ct_action *
+flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_pool *pool;
+
+	idx--;
+	MLX5_ASSERT((idx / MLX5_ASO_CT_ACTIONS_PER_POOL) < mng->n);
+	/* Bit operation AND could be used. */
+	pool = mng->pools[idx / MLX5_ASO_CT_ACTIONS_PER_POOL];
+	return &pool->actions[idx % MLX5_ASO_CT_ACTIONS_PER_POOL];
+}
+
 int mlx5_flow_group_to_table(struct rte_eth_dev *dev,
 			     const struct mlx5_flow_tunnel *tunnel,
 			     uint32_t group, uint32_t *table,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index d810466..51e6ff4 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11119,6 +11119,260 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
 	return age_idx;
 }
 
+/*
+ * Release an ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index of ASO CT action to release.
+ *
+ * @return
+ *   0 when CT action was removed, otherwise the number of references.
+ */
+static inline int
+flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
+	uint32_t ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
+
+	if (!ret) {
+		if (ct->dr_action_orig) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			claim_zero(mlx5_glue->destroy_flow_action
+					(ct->dr_action_orig));
+#endif
+			ct->dr_action_orig = NULL;
+		}
+		if (ct->dr_action_rply) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			claim_zero(mlx5_glue->destroy_flow_action
+					(ct->dr_action_rply));
+#endif
+			ct->dr_action_rply = NULL;
+		}
+		rte_spinlock_lock(&mng->ct_sl);
+		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
+		rte_spinlock_unlock(&mng->ct_sl);
+	}
+	return ret;
+}
+
+/*
+ * Resize the ASO CT pools array by 64 pools.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value and rte_errno is set.
+ */
+static int
+flow_dv_aso_ct_pools_resize(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	void *old_pools = mng->pools;
+	/* Magic number now, need a macro. */
+	uint32_t resize = mng->n + 64;
+	uint32_t mem_size = sizeof(struct mlx5_aso_ct_pool *) * resize;
+	void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);
+
+	if (!pools) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	/* ASO SQ/QP was already initialized in the startup. */
+	if (old_pools) {
+		/* Realloc could be an alternative choice. */
+		rte_memcpy(pools, old_pools,
+			   mng->n * sizeof(struct mlx5_aso_ct_pool *));
+		mlx5_free(old_pools);
+	}
+	mng->n = resize;
+	mng->pools = pools;
+	return 0;
+}
+
+/*
+ * Create and initialize a new ASO CT pool.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] ct_free
+ *   Where to put the pointer of a new CT action.
+ *
+ * @return
+ *   The CT actions pool pointer and @p ct_free is set on success,
+ *   NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_aso_ct_pool *
+flow_dv_ct_pool_create(struct rte_eth_dev *dev,
+		       struct mlx5_aso_ct_action **ct_free)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_pool *pool = NULL;
+	struct mlx5_devx_obj *obj = NULL;
+	uint32_t i;
+	uint32_t log_obj_size = rte_log2_u32(MLX5_ASO_CT_ACTIONS_PER_POOL);
+
+	obj = mlx5_devx_cmd_create_conn_track_offload_obj(priv->sh->ctx,
+						priv->sh->pdn, log_obj_size);
+	if (!obj) {
+		rte_errno = ENODATA;
+		DRV_LOG(ERR, "Failed to create conn_track_offload_obj using DevX.");
+		return NULL;
+	}
+	pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool), 0, SOCKET_ID_ANY);
+	if (!pool) {
+		rte_errno = ENOMEM;
+		claim_zero(mlx5_devx_cmd_destroy(obj));
+		return NULL;
+	}
+	pool->devx_obj = obj;
+	pool->index = mng->next;
+	/* Resize pools array if there is no room for the new pool in it. */
+	if (pool->index == mng->n && flow_dv_aso_ct_pools_resize(dev)) {
+		claim_zero(mlx5_devx_cmd_destroy(obj));
+		mlx5_free(pool);
+		return NULL;
+	}
+	mng->pools[pool->index] = pool;
+	mng->next++;
+	/* Assign the first action in the new pool, the rest go to free list. */
+	*ct_free = &pool->actions[0];
+	/* Lock outside, the list operation is safe here. */
+	for (i = 1; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
+		/* refcnt is 0 when allocating the memory. */
+		pool->actions[i].offset = i;
+		LIST_INSERT_HEAD(&mng->free_cts, &pool->actions[i], next);
+	}
+	return pool;
+}
+
+/*
+ * Allocate a ASO CT action from free list.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Index to ASO CT action on success, 0 otherwise and rte_errno is set.
+ */
+static uint32_t
+flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_action *ct = NULL;
+	struct mlx5_aso_ct_pool *pool;
+	uint8_t reg_c;
+	uint32_t ct_idx;
+
+	MLX5_ASSERT(mng);
+	if (!priv->config.devx) {
+		rte_errno = ENOTSUP;
+		return 0;
+	}
+	/* Get a free CT action, if no, a new pool will be created. */
+	rte_spinlock_lock(&mng->ct_sl);
+	ct = LIST_FIRST(&mng->free_cts);
+	if (ct) {
+		LIST_REMOVE(ct, next);
+	} else if (!flow_dv_ct_pool_create(dev, &ct)) {
+		rte_spinlock_unlock(&mng->ct_sl);
+		rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_ACTION,
+				   NULL, "failed to create ASO CT pool");
+		return 0;
+	}
+	rte_spinlock_unlock(&mng->ct_sl);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	ct_idx = MLX5_MAKE_CT_IDX(pool->index, ct->offset);
+	/* 0: inactive, 1: created, 2+: used by flows. */
+	__atomic_store_n(&ct->refcnt, 1, __ATOMIC_RELAXED);
+	reg_c = mlx5_flow_get_reg_id(dev, MLX5_ASO_CONNTRACK, 0, error);
+	if (!ct->dr_action_orig) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+		ct->dr_action_orig = mlx5_glue->dv_create_flow_action_aso
+			(priv->sh->rx_domain, pool->devx_obj->obj,
+			 ct->offset,
+			 MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_INITIATOR,
+			 reg_c - REG_C_0);
+#else
+		RTE_SET_USED(reg_c);
+#endif
+		if (!ct->dr_action_orig) {
+			flow_dv_aso_ct_release(dev, ct_idx);
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					   "failed to create ASO CT action");
+			return 0;
+		}
+	}
+	if (!ct->dr_action_rply) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+		ct->dr_action_rply = mlx5_glue->dv_create_flow_action_aso
+			(priv->sh->rx_domain, pool->devx_obj->obj,
+			 ct->offset,
+			 MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_RESPONDER,
+			 reg_c - REG_C_0);
+#endif
+		if (!ct->dr_action_rply) {
+			flow_dv_aso_ct_release(dev, ct_idx);
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					   "failed to create ASO CT action");
+			return 0;
+		}
+	}
+	return ct_idx;
+}
+
+/*
+ * Create a conntrack object with context and actions by using ASO mechanism.
+ *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
+ * @param[in] pro
+ *   Pointer to conntrack information profile.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Index to conntrack object on success, 0 otherwise.
+ */
+static uint32_t
+flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
+				   const struct rte_flow_action_conntrack *pro,
+				   struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
+	struct mlx5_aso_ct_action *ct;
+	uint32_t idx;
+
+	if (!sh->ct_aso_en)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Connection is not supported");
+	idx = flow_dv_aso_ct_alloc(dev, error);
+	if (!idx)
+		return rte_flow_error_set(error, rte_errno,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Failed to allocate CT object");
+	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (mlx5_aso_ct_update_by_wqe(sh, ct, pro))
+		return rte_flow_error_set(error, EBUSY,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Failed to update CT");
+	return idx;
+}
+
 /**
  * Fill the flow with DV spec, lock free
  * (mutex should be acquired by caller).
@@ -13309,6 +13563,7 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 {
 	uint32_t idx = 0;
 	uint32_t ret = 0;
+	struct mlx5_priv *priv = dev->data->dev_private;
 
 	switch (action->type) {
 	case RTE_FLOW_ACTION_TYPE_RSS:
@@ -13329,6 +13584,12 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 							 (void *)(uintptr_t)idx;
 		}
 		break;
+	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+		ret = flow_dv_translate_create_conntrack(dev, action->conf,
+							 err);
+		idx = (MLX5_INDIRECT_ACTION_TYPE_CT <<
+		       MLX5_INDIRECT_ACTION_TYPE_OFFSET) | ret;
+		break;
 	default:
 		rte_flow_error_set(err, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
 				   NULL, "action type not supported");
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 08/17] net/mlx5: close CT management structure
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (6 preceding siblings ...)
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 07/17] net/mlx5: add actions creating " Bing Zhao
@ 2021-04-27 15:38 ` Bing Zhao
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 09/17] net/mlx5: add ASO CT query implementation Bing Zhao
                   ` (15 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:38 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

When freeing the IB shared context during stopping a device, the
ASO connection tracking management structure should also be cleaned
up.

All the DR actions created should be destroyed. The structures need
to be freed and ASO CT QP should be released. In the meanwhile, the
allocated and registered memory region for query should also be
deregistered and then freed.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.c          | 56 ++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_aso.c |  4 +++
 2 files changed, 60 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index a884234..5717e72 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -706,6 +706,60 @@ mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
 	return 0;
 }
 
+/*
+ * Close and release all the resources of the
+ * ASO connection tracking management structure.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object to free.
+ */
+static void
+mlx5_flow_aso_ct_mng_close(struct mlx5_dev_ctx_shared *sh)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	struct mlx5_aso_ct_pool *ct_pool;
+	struct mlx5_aso_ct_action *ct;
+	uint32_t idx;
+	uint32_t val;
+	uint32_t cnt;
+	int i;
+
+	mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
+	idx = mng->next;
+	while (idx--) {
+		cnt = 0;
+		ct_pool = mng->pools[idx];
+		for (i = 0; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
+			ct = &ct_pool->actions[i];
+			val = __atomic_sub_fetch(&ct->refcnt, 1,
+						 __ATOMIC_RELAXED);
+			MLX5_ASSERT(val <= 1);
+			if (val > 1)
+				cnt++;
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			if (ct->dr_action_orig)
+				claim_zero(mlx5_glue->destroy_flow_action
+							(ct->dr_action_orig));
+			if (ct->dr_action_rply)
+				claim_zero(mlx5_glue->destroy_flow_action
+							(ct->dr_action_rply));
+#endif
+		}
+		claim_zero(mlx5_devx_cmd_destroy(ct_pool->devx_obj));
+		if (cnt) {
+			DRV_LOG(WARNING, "%u ASO CT objects are being used in the pool %u",
+				cnt, i);
+		}
+		mlx5_free(ct_pool);
+		/* in case of failure. */
+		mng->next--;
+	}
+	mlx5_free(mng->pools);
+	mlx5_free(mng);
+	/* Management structure must be cleared to 0s during allocation. */
+	sh->ct_mng = NULL;
+}
+
 /**
  * Initialize the flow resources' indexed mempool.
  *
@@ -1238,6 +1292,8 @@ mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh)
 	}
 	if (sh->mtrmng)
 		mlx5_aso_flow_mtrs_mng_close(sh);
+	if (sh->ct_mng)
+		mlx5_flow_aso_ct_mng_close(sh);
 	mlx5_flow_ipool_destroy(sh);
 	mlx5_os_dev_shared_handler_uninstall(sh);
 	if (sh->cnt_id_tbl) {
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index bb3221a..6a13b98 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -393,6 +393,10 @@ mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
 	case ASO_OPC_MOD_POLICER:
 		sq = &sh->mtrmng->pools_mng.sq;
 		break;
+	case ASO_OPC_MOD_CONNECTION_TRACKING:
+		mlx5_aso_devx_dereg_mr(&sh->ct_mng->aso_sq.mr);
+		sq = &sh->ct_mng->aso_sq;
+		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
 		return;
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 09/17] net/mlx5: add ASO CT query implementation
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (7 preceding siblings ...)
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 08/17] net/mlx5: close CT management structure Bing Zhao
@ 2021-04-27 15:38 ` Bing Zhao
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 10/17] net/mlx5: add ASO CT destroy handling Bing Zhao
                   ` (14 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:38 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

After the connection tracking context is created and being used by
the flows, the context will be updated by the HW automatically after
a packet passed the CT validation. E.g., the ACK, SEQ, window and
state of CT can be updated with both direction traffic.

In order to query the updated contents of this context, a WQE should
be posted to the SQ with a return buffer. The data will be filled
into the buffer. And the profile will be filled with specific value.

During the execution of query command, the context may be updated.
The result of the query command may not be the latest one.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |   9 +-
 drivers/net/mlx5/mlx5_flow_aso.c | 205 +++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c  |  10 ++
 3 files changed, 223 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 982c0c2..f999828 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -498,7 +498,10 @@ struct mlx5_aso_sq_elem {
 			uint16_t burst_size;
 		};
 		struct mlx5_aso_mtr *mtr;
-		struct mlx5_aso_ct_action *ct;
+		struct {
+			struct mlx5_aso_ct_action *ct;
+			char *query_data;
+		};
 	};
 };
 
@@ -1710,5 +1713,9 @@ int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			      const struct rte_flow_action_conntrack *profile);
 int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
 			   struct mlx5_aso_ct_action *ct);
+int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			     struct mlx5_aso_ct_action *ct,
+			     struct rte_flow_action_conntrack *profile);
+
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 6a13b98..12e8dc7 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -943,6 +943,7 @@ mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
 	/* Fill next WQE. */
 	__atomic_store_n(&ct->state, ASO_CONNTRACK_WAIT, __ATOMIC_RELAXED);
 	sq->elts[sq->head & mask].ct = ct;
+	sq->elts[sq->head & mask].query_data = NULL;
 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
 	/* Each WQE will have a single CT object. */
 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
@@ -1059,10 +1060,92 @@ mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
 		MLX5_ASSERT(ct);
 		__atomic_store_n(&ct->state, ASO_CONNTRACK_READY,
 				 __ATOMIC_RELAXED);
+		if (sq->elts[idx].query_data)
+			rte_memcpy(sq->elts[idx].query_data,
+				   (char *)((uintptr_t)sq->mr.buf + idx * 64),
+				   64);
 	}
 }
 
 /*
+ * Post a WQE to the ASO CT SQ to query the current context.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ * @param[in] ct
+ *   Pointer to the generic CT structure related to the context.
+ * @param[in] data
+ *   Pointer to data area to be filled.
+ *
+ * @return
+ *   1 on success (WQE number), 0 on failure.
+ */
+static int
+mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
+			    struct mlx5_aso_ct_action *ct, char *data)
+{
+	volatile struct mlx5_aso_wqe *wqe = NULL;
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t res;
+	uint16_t wqe_idx;
+	struct mlx5_aso_ct_pool *pool;
+	uint8_t state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+
+	if (state == ASO_CONNTRACK_FREE) {
+		DRV_LOG(ERR, "Fail: No context to query");
+		return -1;
+	} else if (state == ASO_CONNTRACK_WAIT) {
+		return 0;
+	}
+	rte_spinlock_lock(&sq->sqsl);
+	res = size - (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!res)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
+		return 0;
+	}
+	__atomic_store_n(&ct->state, ASO_CONNTRACK_QUERY, __ATOMIC_RELAXED);
+	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
+	/* Confirm the location and address of the prefetch instruction. */
+	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
+	/* Fill next WQE. */
+	wqe_idx = sq->head & mask;
+	sq->elts[wqe_idx].ct = ct;
+	sq->elts[wqe_idx].query_data = data;
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	/* Each WQE will have a single CT object. */
+	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
+						  ct->offset);
+	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
+			(ASO_OPC_MOD_CONNECTION_TRACKING <<
+			 WQE_CSEG_OPC_MOD_OFFSET) |
+			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
+	/*
+	 * There is no write request is required.
+	 * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
+	 * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
+	 * "data_mask" is ignored.
+	 * Buffer address was already filled during initialization.
+	 */
+	wqe->aso_cseg.operand_masks = 0;
+	sq->head++;
+	/*
+	 * Each WQE contains 2 WQEBB's, even though
+	 * data segment is not used in this case.
+	 */
+	sq->pi += 2;
+	rte_io_wmb();
+	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
+	rte_wmb();
+	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
+	rte_wmb();
+	rte_spinlock_unlock(&sq->sqsl);
+	return 1;
+}
+
+/*
  * Handle completions from WQEs sent to ASO CT.
  *
  * @param[in] mng
@@ -1189,3 +1272,125 @@ mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
 		ct->offset, pool->index);
 	return -1;
 }
+
+/*
+ * Convert the hardware conntrack data format into the profile.
+ *
+ * @param[in] profile
+ *   Pointer to conntrack profile to be filled after query.
+ * @param[in] wdata
+ *   Pointer to data fetched from hardware.
+ */
+static inline void
+mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
+			char *wdata)
+{
+	void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
+	void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
+
+	/* MLX5_GET16 should be taken into consideration. */
+	profile->state = (enum rte_flow_conntrack_state)
+			 MLX5_GET(conn_track_aso, wdata, state);
+	profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
+	profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
+					  sack_permitted);
+	profile->live_connection = MLX5_GET(conn_track_aso, wdata,
+					    connection_assured);
+	profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
+						 challenged_acked);
+	profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
+					   max_ack_window);
+	profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
+						 retranmission_limit);
+	profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
+	profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
+	profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
+			      MLX5_GET(conn_track_aso, wdata, last_index);
+	profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
+	profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
+	profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
+	profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
+				reply_dircetion_tcp_liberal_enabled) |
+				MLX5_GET(conn_track_aso, wdata,
+				original_dircetion_tcp_liberal_enabled);
+	/* No liberal in the RTE structure profile. */
+	profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					    reply_dircetion_tcp_scale);
+	profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					reply_dircetion_tcp_close_initiated);
+	profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					reply_dircetion_tcp_data_unacked);
+	profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					reply_dircetion_tcp_max_ack);
+	profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
+					       r_dir, sent_end);
+	profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
+						r_dir, reply_end);
+	profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
+					      r_dir, max_win);
+	profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
+					      r_dir, max_ack);
+	profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					       original_dircetion_tcp_scale);
+	profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					original_dircetion_tcp_close_initiated);
+	profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					original_dircetion_tcp_data_unacked);
+	profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					original_dircetion_tcp_max_ack);
+	profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
+						  o_dir, sent_end);
+	profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
+						   o_dir, reply_end);
+	profile->original_dir.max_win = MLX5_GET(tcp_window_params,
+						 o_dir, max_win);
+	profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
+						 o_dir, max_ack);
+}
+
+/*
+ * Query connection tracking information parameter by send WQE.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ * @param[out] profile
+ *   Pointer to connection tracking TCP information.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			 struct mlx5_aso_ct_action *ct,
+			 struct rte_flow_action_conntrack *profile)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+	char out_data[64 * 2];
+	int ret;
+
+	/* Assertion here. */
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		ret = mlx5_aso_ct_sq_query_single(mng, ct, out_data);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			goto data_handle;
+		/* Waiting for wqe resource or state. */
+		else
+			rte_delay_us_sleep(10u);
+	} while (--poll_wqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+data_handle:
+	ret = mlx5_aso_ct_wait_ready(sh, ct);
+	if (!ret)
+		mlx5_aso_ct_obj_analyze(profile, out_data);
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 51e6ff4..9093142 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -13765,6 +13765,8 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 	uint32_t act_idx = (uint32_t)(uintptr_t)handle;
 	uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
 
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
@@ -13778,6 +13780,14 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 			resp->sec_since_last_hit = __atomic_load_n
 			     (&age_param->sec_since_last_hit, __ATOMIC_RELAXED);
 		return 0;
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		ct = flow_aso_ct_get_by_idx(dev, idx);
+		if (!ct->refcnt)
+			return rte_flow_error_set(error, ENOMEM,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"CT object is inactive");
+		return mlx5_aso_ct_query_by_wqe(priv->sh, ct, data);
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 10/17] net/mlx5: add ASO CT destroy handling
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (8 preceding siblings ...)
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 09/17] net/mlx5: add ASO CT query implementation Bing Zhao
@ 2021-04-27 15:38 ` Bing Zhao
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 11/17] net/mlx5: add translation for CT action Bing Zhao
                   ` (13 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:38 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

When trying to destroy an ASO connection tracking context, the DR
action created on this context should also be destroyed. Before
inserting the related software object into the management free list,
the reference count should be checked.

Right now, the context object will not be freed to the system and
will be reused directly from the free list.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_dv.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 9093142..f4fa3a0 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11153,6 +11153,9 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 #endif
 			ct->dr_action_rply = NULL;
 		}
+		/* Clear the state to free, no need in 1st allocation. */
+		__atomic_store_n(&ct->state, ASO_CONNTRACK_FREE,
+				 __ATOMIC_RELAXED);
 		rte_spinlock_lock(&mng->ct_sl);
 		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
 		rte_spinlock_unlock(&mng->ct_sl);
@@ -13638,6 +13641,12 @@ flow_dv_action_destroy(struct rte_eth_dev *dev,
 			DRV_LOG(DEBUG, "Indirect age action %" PRIu32 " was"
 				" released with references %d.", idx, ret);
 		return 0;
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		ret = flow_dv_aso_ct_release(dev, idx);
+		if (ret)
+			DRV_LOG(DEBUG, "Connection tracking object %u still "
+				"has references %d.", idx, ret);
+		return 0;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 11/17] net/mlx5: add translation for CT action
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (9 preceding siblings ...)
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 10/17] net/mlx5: add ASO CT destroy handling Bing Zhao
@ 2021-04-27 15:38 ` Bing Zhao
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 12/17] net/mlx5: add translation of CT item Bing Zhao
                   ` (12 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:38 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

When creating a flow with this action context for CT, it needs to be
translated in 2 levels.
First, retrieve from action context to RTE_FLOW action.
Second, translate it to the correct DR action with traffic direction.

Before using the DR action in a flow, the CT context should be
available to use in the hardware.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |  3 ++-
 drivers/net/mlx5/mlx5_flow.c     |  9 +++++++++
 drivers/net/mlx5/mlx5_flow.h     |  1 +
 drivers/net/mlx5/mlx5_flow_aso.c | 40 ++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c  | 18 ++++++++++++++++++
 5 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index f999828..3b67706 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1716,6 +1716,7 @@ int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
 int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			     struct mlx5_aso_ct_action *ct,
 			     struct rte_flow_action_conntrack *profile);
-
+int mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
+			  struct mlx5_aso_ct_action *ct);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 1c28b63..7b9f055 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -3552,6 +3552,15 @@ flow_action_handles_translate(struct rte_eth_dev *dev,
 				break;
 			}
 			/* Fall-through */
+		case MLX5_INDIRECT_ACTION_TYPE_CT:
+			if (priv->sh->ct_aso_en) {
+				translated[handle->index].type =
+					RTE_FLOW_ACTION_TYPE_CONNTRACK;
+				translated[handle->index].conf =
+							 (void *)(uintptr_t)idx;
+				break;
+			}
+			/* Fall-through */
 		default:
 			mlx5_free(translated);
 			return rte_flow_error_set
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 988b171..ddfc517 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -226,6 +226,7 @@ enum mlx5_feature_name {
 #define MLX5_FLOW_ACTION_TUNNEL_MATCH (1ull << 38)
 #define MLX5_FLOW_ACTION_MODIFY_FIELD (1ull << 39)
 #define MLX5_FLOW_ACTION_METER_WITH_TERMINATED_POLICY (1ull << 40)
+#define MLX5_FLOW_ACTION_CT (1ull << 41)
 
 #define MLX5_FLOW_FATE_ACTIONS \
 	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | \
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 12e8dc7..21de855 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -1394,3 +1394,43 @@ mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		mlx5_aso_ct_obj_analyze(profile, out_data);
 	return ret;
 }
+
+/*
+ * Make sure the conntrack context is synchronized with hardware before
+ * creating a flow rule that uses it.
+ *
+ * @param[in] sh
+ *   Pointer to shared device context.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
+		      struct mlx5_aso_ct_action *ct)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	uint8_t state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+
+	if (state == ASO_CONNTRACK_FREE) {
+		rte_errno = ENXIO;
+		return -rte_errno;
+	} else if (state == ASO_CONNTRACK_READY ||
+		   state == ASO_CONNTRACK_QUERY) {
+		return 0;
+	}
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+		if (state == ASO_CONNTRACK_READY ||
+		    state == ASO_CONNTRACK_QUERY)
+			return 0;
+		/* Waiting for CQE ready, consider should block or sleep. */
+		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
+	} while (--poll_cqe_times);
+	rte_errno = EBUSY;
+	return -rte_errno;
+}
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index f4fa3a0..3ebeb58 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11373,6 +11373,7 @@ flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
 		return rte_flow_error_set(error, EBUSY,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to update CT");
+	ct->is_original = !!pro->is_original_dir;
 	return idx;
 }
 
@@ -11529,6 +11530,8 @@ flow_dv_translate(struct rte_eth_dev *dev,
 		const struct rte_flow_action *found_action = NULL;
 		uint32_t jump_group = 0;
 		struct mlx5_flow_counter *cnt;
+		uint32_t ct_idx;
+		struct mlx5_aso_ct_action *ct;
 
 		if (!mlx5_flow_os_action_supported(action_type))
 			return rte_flow_error_set(error, ENOTSUP,
@@ -12002,6 +12005,21 @@ flow_dv_translate(struct rte_eth_dev *dev,
 				return -rte_errno;
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			break;
+		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+			ct_idx = (uint32_t)(uintptr_t)action->conf;
+			ct = flow_aso_ct_get_by_idx(dev, ct_idx);
+			if (mlx5_aso_ct_available(priv->sh, ct))
+				return -rte_errno;
+			if (ct->is_original)
+				dev_flow->dv.actions[actions_n] =
+							ct->dr_action_orig;
+			else
+				dev_flow->dv.actions[actions_n] =
+							ct->dr_action_rply;
+			__atomic_fetch_add(&ct->refcnt, 1, __ATOMIC_RELAXED);
+			actions_n++;
+			action_flags |= MLX5_FLOW_ACTION_CT;
+			break;
 		case RTE_FLOW_ACTION_TYPE_END:
 			actions_end = true;
 			if (mhdr_res->actions_num) {
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 12/17] net/mlx5: add translation of CT item
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (10 preceding siblings ...)
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 11/17] net/mlx5: add translation for CT action Bing Zhao
@ 2021-04-27 15:38 ` Bing Zhao
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 13/17] net/mlx5: add CT context update Bing Zhao
                   ` (11 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:38 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

The return register of the DR action will be used for matching.
After the ASO CT checking of a TCP packet, the syndrome is filled in
the register. Only the 8 LSB should be used. A converting from
RTE_FLOW_CONNTRACK_FLAG* to the syndrome should be done after
checing the spec and mask fields.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    |  7 +++++
 drivers/net/mlx5/mlx5_flow_dv.c | 62 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index ddfc517..c52468c 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -405,6 +405,13 @@ enum mlx5_feature_name {
 /* Maximum number of fields to modify in MODIFY_FIELD */
 #define MLX5_ACT_MAX_MOD_FIELDS 5
 
+/* Syndrome bits definition for connection tracking. */
+#define MLX5_CT_SYNDROME_VALID		(0x0 << 6)
+#define MLX5_CT_SYNDROME_INVALID	(0x1 << 6)
+#define MLX5_CT_SYNDROME_TRAP		(0x2 << 6)
+#define MLX5_CT_SYNDROME_STATE_CHANGE	(0x1 << 1)
+#define MLX5_CT_SYNDROME_BAD_PACKET	(0x1 << 0)
+
 enum mlx5_flow_drv_type {
 	MLX5_FLOW_TYPE_MIN,
 	MLX5_FLOW_TYPE_DV,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 3ebeb58..eb24d5e 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -9378,6 +9378,64 @@ flow_dv_translate_item_ecpri(struct rte_eth_dev *dev, void *matcher,
 	}
 }
 
+/*
+ * Add connection tracking status item to matcher
+ *
+ * @param[in] dev
+ *   The devich to configure through.
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ */
+static void
+flow_dv_translate_item_aso_ct(struct rte_eth_dev *dev,
+			      void *matcher, void *key,
+			      const struct rte_flow_item *item)
+{
+	uint32_t reg_value = 0;
+	int reg_id;
+	/* 8LSB 0b 11/0000/11, middle 4 bits are reserved. */
+	uint32_t reg_mask = 0;
+	const struct rte_flow_item_conntrack *spec = item->spec;
+	const struct rte_flow_item_conntrack *mask = item->mask;
+	uint32_t flags;
+	struct rte_flow_error error;
+
+	if (!mask)
+		mask = &rte_flow_item_conntrack_mask;
+	if (!spec || !mask->flags)
+		return;
+	flags = spec->flags & mask->flags;
+	/* The conflict should be checked in the validation. */
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_VALID)
+		reg_value |= MLX5_CT_SYNDROME_VALID;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_CHANGED)
+		reg_value |= MLX5_CT_SYNDROME_STATE_CHANGE;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_INVALID)
+		reg_value |= MLX5_CT_SYNDROME_INVALID;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED)
+		reg_value |= MLX5_CT_SYNDROME_TRAP;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD)
+		reg_value |= MLX5_CT_SYNDROME_BAD_PACKET;
+	if (mask->flags & (RTE_FLOW_CONNTRACK_PKT_STATE_VALID |
+			   RTE_FLOW_CONNTRACK_PKT_STATE_INVALID |
+			   RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED))
+		reg_mask |= 0xc0;
+	if (mask->flags & RTE_FLOW_CONNTRACK_PKT_STATE_CHANGED)
+		reg_mask |= MLX5_CT_SYNDROME_STATE_CHANGE;
+	if (mask->flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD)
+		reg_mask |= MLX5_CT_SYNDROME_BAD_PACKET;
+	/* The REG_C_x value could be saved during startup. */
+	reg_id = mlx5_flow_get_reg_id(dev, MLX5_ASO_CONNTRACK, 0, &error);
+	if (reg_id == REG_NON)
+		return;
+	flow_dv_match_meta_reg(matcher, key, (enum modify_reg)reg_id,
+			       reg_value, reg_mask);
+}
+
 static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
 
 #define HEADER_IS_ZERO(match_criteria, headers)				     \
@@ -12302,6 +12360,10 @@ flow_dv_translate(struct rte_eth_dev *dev,
 			/* No other protocol should follow eCPRI layer. */
 			last_item = MLX5_FLOW_LAYER_ECPRI;
 			break;
+		case RTE_FLOW_ITEM_TYPE_CONNTRACK:
+			flow_dv_translate_item_aso_ct(dev, match_mask,
+						      match_value, items);
+			break;
 		default:
 			break;
 		}
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 13/17] net/mlx5: add CT context update
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (11 preceding siblings ...)
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 12/17] net/mlx5: add translation of CT item Bing Zhao
@ 2021-04-27 15:38 ` Bing Zhao
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 14/17] net/mlx5: validation of CT action Bing Zhao
                   ` (10 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:38 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

When updating a connection tracking context, two separate parts
could be updated.
First, the direction. This will only update the traffic direction
recorded in the software for flow creation.
Second, the TCP parameters. The hardware context will be updated
via the WQE. This update will be blocked until the hardware status
is updated and ready for the next flow creation.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_dv.c | 47 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index eb24d5e..a27aff6 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -13801,6 +13801,51 @@ __flow_dv_action_rss_update(struct rte_eth_dev *dev, uint32_t idx,
 	return ret;
 }
 
+/*
+ * Updates in place conntrack context or direction.
+ * Context update should be synchronized.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   The conntrack object ID to be updated.
+ * @param[in] update
+ *   Pointer to the structure of information to update.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value.
+ */
+static int
+__flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
+			   const struct rte_flow_modify_conntrack *update,
+			   struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
+	const struct rte_flow_action_conntrack *new_prf;
+	int ret = 0;
+
+	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (!ct->refcnt)
+		return rte_flow_error_set(error, ENOMEM,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "CT object is inactive");
+	new_prf = &update->new_ct;
+	if (update->direction)
+		ct->is_original = !!new_prf->is_original_dir;
+	if (update->state) {
+		ret = mlx5_aso_ct_update_by_wqe(priv->sh, ct, new_prf);
+		/* Block until ready or a failure. */
+		if (!ret)
+			ret = mlx5_aso_ct_available(priv->sh, ct);
+	}
+	return ret;
+}
+
 /**
  * Updates in place shared action configuration, lock free,
  * (mutex should be acquired by caller).
@@ -13836,6 +13881,8 @@ flow_dv_action_update(struct rte_eth_dev *dev,
 	case MLX5_INDIRECT_ACTION_TYPE_RSS:
 		action_conf = ((const struct rte_flow_action *)update)->conf;
 		return __flow_dv_action_rss_update(dev, idx, action_conf, err);
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		return __flow_dv_action_ct_update(dev, idx, update, err);
 	default:
 		return rte_flow_error_set(err, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 14/17] net/mlx5: validation of CT action
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (12 preceding siblings ...)
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 13/17] net/mlx5: add CT context update Bing Zhao
@ 2021-04-27 15:38 ` Bing Zhao
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 15/17] net/mlx5: validation of CT item Bing Zhao
                   ` (9 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:38 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

The validation of a CT action contains two parts. The first is the
CT action configurations parameter. When creating a CT action
context, some members need to be verified.

The second is that when creating a flow, the DR action of CT should
be validated with other actions and items as well. Currently, only
the TCP protocol support connection tracking.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h         |  4 +++
 drivers/net/mlx5/mlx5_flow.c    | 31 +++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c | 68 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 103 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 3b67706..e983897 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1617,6 +1617,10 @@ int mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow,
 void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
 int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
 			uint32_t nb_contexts, struct rte_flow_error *error);
+int mlx5_validate_action_ct(struct rte_eth_dev *dev,
+			    const struct rte_flow_action_conntrack *conntrack,
+			    struct rte_flow_error *error);
+
 
 /* mlx5_mp_os.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 7b9f055..ee82243 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1687,6 +1687,37 @@ mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
 	return 0;
 }
 
+/*
+ * Validate the ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] conntrack
+ *   Pointer to the CT action profile.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_validate_action_ct(struct rte_eth_dev *dev,
+			const struct rte_flow_action_conntrack *conntrack,
+			struct rte_flow_error *error)
+{
+	RTE_SET_USED(dev);
+
+	if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Invalid CT state");
+	if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Invalid last TCP packet flag");
+	return 0;
+}
+
 /**
  * Verify the @p attributes will be correctly understood by the NIC and store
  * them in the @p flow if everything is correct.
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index a27aff6..2bf966c 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -3400,6 +3400,57 @@ flow_dv_validate_action_raw_encap_decap
 	return 0;
 }
 
+/*
+ * Validate the ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] action_flags
+ *   Holds the actions detected until now.
+ * @param[in] item_flags
+ *   The items found in this flow rule.
+ * @param[in] attr
+ *   Pointer to flow attributes.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_action_aso_ct(struct rte_eth_dev *dev,
+			       uint64_t action_flags,
+			       uint64_t item_flags,
+			       const struct rte_flow_attr *attr,
+			       struct rte_flow_error *error)
+{
+	RTE_SET_USED(dev);
+
+	if (attr->group == 0 && !attr->transfer)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "Only support non-root table");
+	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "CT cannot follow a fate action");
+	if ((action_flags & MLX5_FLOW_ACTION_METER) ||
+	    (action_flags & MLX5_FLOW_ACTION_AGE))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Only one ASO action is supported");
+	if (action_flags & MLX5_FLOW_ACTION_ENCAP)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Encap cannot exist before CT");
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					  "Not a outer TCP packet");
+	return 0;
+}
+
 /**
  * Match encap_decap resource.
  *
@@ -7204,6 +7255,14 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			rw_act_num += ret;
 			break;
+		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+			ret = flow_dv_validate_action_aso_ct(dev, action_flags,
+							     item_flags, attr,
+							     error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_FLOW_ACTION_CT;
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ACTION,
@@ -13838,6 +13897,9 @@ __flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
 	if (update->direction)
 		ct->is_original = !!new_prf->is_original_dir;
 	if (update->state) {
+		ret = mlx5_validate_action_ct(dev, new_prf, error);
+		if (ret)
+			return ret;
 		ret = mlx5_aso_ct_update_by_wqe(priv->sh, ct, new_prf);
 		/* Block until ready or a failure. */
 		if (!ret)
@@ -15691,6 +15753,12 @@ flow_dv_action_validate(struct rte_eth_dev *dev,
 						NULL,
 					     "shared age action not supported");
 		return flow_dv_validate_action_age(0, action, dev, err);
+	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+		if (!priv->sh->ct_aso_en)
+			return rte_flow_error_set(err, ENOTSUP,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					"ASO CT is not supported");
+		return mlx5_validate_action_ct(dev, action->conf, err);
 	default:
 		return rte_flow_error_set(err, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 15/17] net/mlx5: validation of CT item
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (13 preceding siblings ...)
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 14/17] net/mlx5: validation of CT action Bing Zhao
@ 2021-04-27 15:38 ` Bing Zhao
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 16/17] net/mlx5: reduce the reference count of CT Bing Zhao
                   ` (8 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:38 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

The item of ASO connection tracking will be translated into the
register value when matching. The validation of this item has no
dependency on other layers, since the flow including this item
should be jumped from another group. All the layers checking was
already done in the previous groups. Only the state bits conflict
should be checked.

It is assumed that the flow with CT item will always work on the
TCP traffic.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    |  3 +++
 drivers/net/mlx5/mlx5_flow_dv.c | 52 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index c52468c..3b896e3 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -147,6 +147,9 @@ enum mlx5_feature_name {
 #define MLX5_FLOW_LAYER_GENEVE_OPT (UINT64_C(1) << 32)
 #define MLX5_FLOW_LAYER_GTP_PSC (UINT64_C(1) << 33)
 
+/* Conntrack item. */
+#define MLX5_FLOW_LAYER_ASO_CT (UINT64_C(1) << 34)
+
 /* Outer Masks. */
 #define MLX5_FLOW_LAYER_OUTER_L3 \
 	(MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 2bf966c..6478625 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -2598,6 +2598,52 @@ flow_dv_validate_item_ipv6_frag_ext(const struct rte_flow_item *item,
 				  "specified range not supported");
 }
 
+/*
+ * Validate ASO CT item.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Pointer to bit-fields that holds the items detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_item_aso_ct(struct rte_eth_dev *dev,
+			     const struct rte_flow_item *item,
+			     uint64_t *item_flags,
+			     struct rte_flow_error *error)
+{
+	const struct rte_flow_item_conntrack *spec = item->spec;
+	const struct rte_flow_item_conntrack *mask = item->mask;
+	RTE_SET_USED(dev);
+	uint32_t flags;
+
+	if (*item_flags & MLX5_FLOW_LAYER_ASO_CT)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+					  "Only one CT is supported");
+	if (mask)
+		flags = spec->flags & mask->flags;
+	else
+		flags = spec->flags;
+	if ((flags & RTE_FLOW_CONNTRACK_PKT_STATE_VALID) &&
+	    ((flags & RTE_FLOW_CONNTRACK_PKT_STATE_INVALID) ||
+	     (flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD) ||
+	     (flags & RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED)))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+					  "Conflict status bits");
+	/* State change also needs to be considered. */
+	*item_flags |= MLX5_FLOW_LAYER_ASO_CT;
+	return 0;
+}
+
 /**
  * Validate the pop VLAN action.
  *
@@ -6695,6 +6741,12 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 				return ret;
 			last_item = MLX5_FLOW_LAYER_ECPRI;
 			break;
+		case RTE_FLOW_ITEM_TYPE_CONNTRACK:
+			ret = flow_dv_validate_item_aso_ct(dev, items,
+							   &item_flags, error);
+			if (ret < 0)
+				return ret;
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ITEM,
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 16/17] net/mlx5: reduce the reference count of CT
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (14 preceding siblings ...)
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 15/17] net/mlx5: validation of CT item Bing Zhao
@ 2021-04-27 15:38 ` Bing Zhao
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 17/17] net/mlx5: add support of CT between two ports Bing Zhao
                   ` (7 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:38 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

In order to release the DR actions and reuse the context of a CT,
the reference count should be handled correctly, especially in the
flow destroying.

The CT index will be recorded in the rte_flow by reusing the age
index. The action context type should be saved for CT also. When
destroying a flow, if the context type is CT and the index valid.
The release process should be handled.

By default, the handling will fall back to release the age.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    | 6 +++++-
 drivers/net/mlx5/mlx5_flow_dv.c | 7 ++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 3b896e3..4ad9910 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -977,11 +977,15 @@ struct rte_flow {
 	uint32_t drv_type:2; /**< Driver type. */
 	uint32_t tunnel:1;
 	uint32_t meter:24; /**< Holds flow meter id. */
+	uint32_t indirect_type:2; /**< Indirect action type. */
 	uint32_t rix_mreg_copy;
 	/**< Index to metadata register copy table resource. */
 	uint32_t counter; /**< Holds flow counter. */
 	uint32_t tunnel_id;  /**< Tunnel id */
-	uint32_t age; /**< Holds ASO age bit index. */
+	union {
+		uint32_t age; /**< Holds ASO age bit index. */
+		uint32_t ct; /**< Holds ASO CT index. */
+	};
 	uint32_t geneve_tlv_option; /**< Holds Geneve TLV option id. > */
 } __rte_packed;
 
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 6478625..3e85d5e 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -12185,6 +12185,8 @@ flow_dv_translate(struct rte_eth_dev *dev,
 			else
 				dev_flow->dv.actions[actions_n] =
 							ct->dr_action_rply;
+			flow->indirect_type = MLX5_INDIRECT_ACTION_TYPE_CT;
+			flow->ct = ct_idx;
 			__atomic_fetch_add(&ct->refcnt, 1, __ATOMIC_RELAXED);
 			actions_n++;
 			action_flags |= MLX5_FLOW_ACTION_CT;
@@ -13328,7 +13330,10 @@ flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
 			mlx5_flow_meter_detach(priv, fm);
 		flow->meter = 0;
 	}
-	if (flow->age)
+	/* Keep the current age handling by default. */
+	if (flow->indirect_type == MLX5_INDIRECT_ACTION_TYPE_CT && flow->ct)
+		flow_dv_aso_ct_release(dev, flow->ct);
+	else if (flow->age)
 		flow_dv_aso_age_release(dev, flow->age);
 	if (flow->geneve_tlv_option) {
 		flow_dv_geneve_tlv_option_resource_release(dev);
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH 17/17] net/mlx5: add support of CT between two ports
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (15 preceding siblings ...)
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 16/17] net/mlx5: reduce the reference count of CT Bing Zhao
@ 2021-04-27 15:38 ` Bing Zhao
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (6 subsequent siblings)
  23 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-04-27 15:38 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: dev, orika, rasland

After creating a connection tracking context, it could be used
between two ports. For each port, the flow for one direction traffic
will be created.

The context can only be shared between the owner port and the peer
port that specified when creating. Only the owner port could update
the context or query it right now.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    | 57 ++++++++++++++++++++++++++++++++--
 drivers/net/mlx5/mlx5_flow_dv.c | 69 +++++++++++++++++++++++++++++++----------
 2 files changed, 108 insertions(+), 18 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 4ad9910..d49cb53 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -48,6 +48,25 @@ enum {
 	MLX5_INDIRECT_ACTION_TYPE_CT,
 };
 
+/* Now, the maximal ports will be supported is 256, action number is 4M. */
+#define MLX5_INDIRECT_ACT_CT_MAX_PORT 0x100
+
+#define MLX5_INDIRECT_ACT_CT_OWNER_SHIFT 22
+#define MLX5_INDIRECT_ACT_CT_OWNER_MASK (MLX5_INDIRECT_ACT_CT_MAX_PORT - 1)
+
+/* 30-31: type, 22-29: owner port, 0-21: index. */
+#define MLX5_INDIRECT_ACT_CT_GEN_IDX(owner, index) \
+	((MLX5_INDIRECT_ACTION_TYPE_CT << MLX5_INDIRECT_ACTION_TYPE_OFFSET) | \
+	 (((owner) & MLX5_INDIRECT_ACT_CT_OWNER_MASK) << \
+	  MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) | (index))
+
+#define MLX5_INDIRECT_ACT_CT_GET_OWNER(index) \
+	(((index) >> MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) & \
+	 MLX5_INDIRECT_ACT_CT_OWNER_MASK)
+
+#define MLX5_INDIRECT_ACT_CT_GET_IDX(index) \
+	((index) & ((1 << MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) - 1))
+
 /* Matches on selected register. */
 struct mlx5_rte_flow_item_tag {
 	enum modify_reg id;
@@ -1302,7 +1321,7 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
 }
 
 /*
- * Get ASO CT action by index.
+ * Get ASO CT action by device and index.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
@@ -1313,7 +1332,7 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
  *   The specified ASO CT action pointer.
  */
 static inline struct mlx5_aso_ct_action *
-flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
+flow_aso_ct_get_by_dev_idx(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
@@ -1326,6 +1345,40 @@ flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
 	return &pool->actions[idx % MLX5_ASO_CT_ACTIONS_PER_POOL];
 }
 
+/*
+ * Get ASO CT action by owner & index.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index to the ASO CT action and owner port combination.
+ *
+ * @return
+ *   The specified ASO CT action pointer.
+ */
+static inline struct mlx5_aso_ct_action *
+flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t own_idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(own_idx);
+	uint32_t idx = MLX5_INDIRECT_ACT_CT_GET_IDX(own_idx);
+
+	if (owner == PORT_ID(priv)) {
+		ct = flow_aso_ct_get_by_dev_idx(dev, idx);
+	} else {
+		struct rte_eth_dev *owndev = &rte_eth_devices[owner];
+
+		MLX5_ASSERT(owner < RTE_MAX_ETHPORTS);
+		if (dev->data->dev_started != 1)
+			return NULL;
+		ct = flow_aso_ct_get_by_dev_idx(owndev, idx);
+		if (ct->peer != PORT_ID(priv))
+			return NULL;
+	}
+	return ct;
+}
+
 int mlx5_flow_group_to_table(struct rte_eth_dev *dev,
 			     const struct mlx5_flow_tunnel *tunnel,
 			     uint32_t group, uint32_t *table,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 3e85d5e..1cb6f73 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11289,7 +11289,7 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
 }
 
 /*
- * Release an ASO CT action.
+ * Release an ASO CT action by its own device.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
@@ -11300,11 +11300,11 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
  *   0 when CT action was removed, otherwise the number of references.
  */
 static inline int
-flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
+flow_dv_aso_ct_dev_release(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
-	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
+	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_dev_idx(dev, idx);
 	uint32_t ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
 
 	if (!ret) {
@@ -11329,7 +11329,21 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
 		rte_spinlock_unlock(&mng->ct_sl);
 	}
-	return ret;
+	return (int)ret;
+}
+
+static inline int
+flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t own_idx)
+{
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(own_idx);
+	uint32_t idx = MLX5_INDIRECT_ACT_CT_GET_IDX(own_idx);
+	struct rte_eth_dev *owndev = &rte_eth_devices[owner];
+	RTE_SET_USED(dev);
+
+	MLX5_ASSERT(owner < RTE_MAX_ETHPORTS);
+	if (dev->data->dev_started != 1)
+		return -1;
+	return flow_dv_aso_ct_dev_release(owndev, idx);
 }
 
 /*
@@ -11479,7 +11493,7 @@ flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
 		RTE_SET_USED(reg_c);
 #endif
 		if (!ct->dr_action_orig) {
-			flow_dv_aso_ct_release(dev, ct_idx);
+			flow_dv_aso_ct_dev_release(dev, ct_idx);
 			rte_flow_error_set(error, rte_errno,
 					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					   "failed to create ASO CT action");
@@ -11495,7 +11509,7 @@ flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
 			 reg_c - REG_C_0);
 #endif
 		if (!ct->dr_action_rply) {
-			flow_dv_aso_ct_release(dev, ct_idx);
+			flow_dv_aso_ct_dev_release(dev, ct_idx);
 			rte_flow_error_set(error, rte_errno,
 					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					   "failed to create ASO CT action");
@@ -11537,12 +11551,13 @@ flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
 		return rte_flow_error_set(error, rte_errno,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to allocate CT object");
-	ct = flow_aso_ct_get_by_idx(dev, idx);
+	ct = flow_aso_ct_get_by_dev_idx(dev, idx);
 	if (mlx5_aso_ct_update_by_wqe(sh, ct, pro))
 		return rte_flow_error_set(error, EBUSY,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to update CT");
 	ct->is_original = !!pro->is_original_dir;
+	ct->peer = pro->peer_port;
 	return idx;
 }
 
@@ -11699,7 +11714,7 @@ flow_dv_translate(struct rte_eth_dev *dev,
 		const struct rte_flow_action *found_action = NULL;
 		uint32_t jump_group = 0;
 		struct mlx5_flow_counter *cnt;
-		uint32_t ct_idx;
+		uint32_t owner_idx;
 		struct mlx5_aso_ct_action *ct;
 
 		if (!mlx5_flow_os_action_supported(action_type))
@@ -12175,8 +12190,12 @@ flow_dv_translate(struct rte_eth_dev *dev,
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			break;
 		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
-			ct_idx = (uint32_t)(uintptr_t)action->conf;
-			ct = flow_aso_ct_get_by_idx(dev, ct_idx);
+			owner_idx = (uint32_t)(uintptr_t)action->conf;
+			ct = flow_aso_ct_get_by_idx(dev, owner_idx);
+			if (!ct) {
+				rte_errno = EINVAL;
+				return -rte_errno;
+			}
 			if (mlx5_aso_ct_available(priv->sh, ct))
 				return -rte_errno;
 			if (ct->is_original)
@@ -12186,7 +12205,7 @@ flow_dv_translate(struct rte_eth_dev *dev,
 				dev_flow->dv.actions[actions_n] =
 							ct->dr_action_rply;
 			flow->indirect_type = MLX5_INDIRECT_ACTION_TYPE_CT;
-			flow->ct = ct_idx;
+			flow->ct = owner_idx;
 			__atomic_fetch_add(&ct->refcnt, 1, __ATOMIC_RELAXED);
 			actions_n++;
 			action_flags |= MLX5_FLOW_ACTION_CT;
@@ -13786,8 +13805,7 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
 		ret = flow_dv_translate_create_conntrack(dev, action->conf,
 							 err);
-		idx = (MLX5_INDIRECT_ACTION_TYPE_CT <<
-		       MLX5_INDIRECT_ACTION_TYPE_OFFSET) | ret;
+		idx = MLX5_INDIRECT_ACT_CT_GEN_IDX(PORT_ID(priv), ret);
 		break;
 	default:
 		rte_flow_error_set(err, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
@@ -13839,7 +13857,9 @@ flow_dv_action_destroy(struct rte_eth_dev *dev,
 		return 0;
 	case MLX5_INDIRECT_ACTION_TYPE_CT:
 		ret = flow_dv_aso_ct_release(dev, idx);
-		if (ret)
+		if (ret < 0)
+			return ret;
+		if (ret > 0)
 			DRV_LOG(DEBUG, "Connection tracking object %u still "
 				"has references %d.", idx, ret);
 		return 0;
@@ -13943,8 +13963,16 @@ __flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
 	struct mlx5_aso_ct_action *ct;
 	const struct rte_flow_action_conntrack *new_prf;
 	int ret = 0;
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(idx);
+	uint32_t dev_idx;
 
-	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (PORT_ID(priv) != owner)
+		return rte_flow_error_set(error, EACCES,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "CT object owned by another port");
+	dev_idx = MLX5_INDIRECT_ACT_CT_GET_IDX(idx);
+	ct = flow_aso_ct_get_by_dev_idx(dev, dev_idx);
 	if (!ct->refcnt)
 		return rte_flow_error_set(error, ENOMEM,
 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -14022,6 +14050,8 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_action *ct;
+	uint16_t owner;
+	uint32_t dev_idx;
 
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
@@ -14036,7 +14066,14 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 			     (&age_param->sec_since_last_hit, __ATOMIC_RELAXED);
 		return 0;
 	case MLX5_INDIRECT_ACTION_TYPE_CT:
-		ct = flow_aso_ct_get_by_idx(dev, idx);
+		owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(idx);
+		if (owner != PORT_ID(priv))
+			return rte_flow_error_set(error, EACCES,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"CT object owned by another port");
+		dev_idx = MLX5_INDIRECT_ACT_CT_GET_IDX(idx);
+		ct = flow_aso_ct_get_by_dev_idx(dev, dev_idx);
 		if (!ct->refcnt)
 			return rte_flow_error_set(error, ENOMEM,
 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-- 
2.5.5


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (16 preceding siblings ...)
  2021-04-27 15:38 ` [dpdk-dev] [PATCH 17/17] net/mlx5: add support of CT between two ports Bing Zhao
@ 2021-05-05  4:19 ` Bing Zhao
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 01/17] common/mlx5: add connection tracking object definition Bing Zhao
                     ` (17 more replies)
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
                   ` (5 subsequent siblings)
  23 siblings, 18 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

This patch set adds the connection tracking offload support in the
mlx5 driver, as well as the documents update.

---
v2: code bug fixes, commits clean up and doc update.
---

Bing Zhao (17):
  common/mlx5: add connection tracking object definition
  common/mlx5: add CT offload capability checking
  net/mlx5: use meter color reg for CT
  net/mlx5: initialization of CT management
  common/mlx5: add Dexv CT objects creation
  net/mlx5: add modify support for CT
  net/mlx5: add actions creating for CT
  net/mlx5: close CT management structure
  net/mlx5: add ASO CT query implementation
  net/mlx5: add ASO CT destroy handling
  net/mlx5: add translation of CT action
  net/mlx5: add translation of CT item
  net/mlx5: add CT context update
  net/mlx5: validation of CT action
  net/mlx5: validation of CT item
  net/mlx5: add support of CT between two ports
  doc: update mlx5 support for conntrack

 doc/guides/nics/features/default.ini   |   1 +
 doc/guides/nics/features/mlx5.ini      |   1 +
 doc/guides/nics/mlx5.rst               |  14 +
 doc/guides/rel_notes/release_21_05.rst |   2 +
 drivers/common/mlx5/linux/meson.build  |   2 +
 drivers/common/mlx5/mlx5_devx_cmds.c   |  53 +++
 drivers/common/mlx5/mlx5_devx_cmds.h   |   5 +
 drivers/common/mlx5/mlx5_prm.h         |  88 ++++
 drivers/common/mlx5/version.map        |   1 +
 drivers/net/mlx5/linux/mlx5_os.c       |  13 +
 drivers/net/mlx5/mlx5.c                |  92 ++++
 drivers/net/mlx5/mlx5.h                |  76 ++++
 drivers/net/mlx5/mlx5_flow.c           |  44 +-
 drivers/net/mlx5/mlx5_flow.h           | 101 ++++-
 drivers/net/mlx5/mlx5_flow_aso.c       | 592 ++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c        | 601 ++++++++++++++++++++++++-
 16 files changed, 1683 insertions(+), 3 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 01/17] common/mlx5: add connection tracking object definition
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
@ 2021-05-05  4:19   ` Bing Zhao
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 02/17] common/mlx5: add CT offload capability checking Bing Zhao
                     ` (16 subsequent siblings)
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The structures of ASO connection tracking offload object are added
based on the definitions in the PRM. One CT object context will be
loaded into the cache completely in a reversed order of dwords. The
valid bit should be the MSB of the last dword. This is used for the
conntrack context creation and update, as well as for the query.

The capabilities 2 (HCA_CAP_2) layout is also added. The connection
tracking related capabilities could be queried via the HCA_CAP_2.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/mlx5_prm.h | 85 ++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index efa5ae67bf..4da89d3379 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1119,6 +1119,7 @@ enum {
 	MLX5_GET_HCA_CAP_OP_MOD_ROCE = 0x4 << 1,
 	MLX5_GET_HCA_CAP_OP_MOD_NIC_FLOW_TABLE = 0x7 << 1,
 	MLX5_GET_HCA_CAP_OP_MOD_VDPA_EMULATION = 0x13 << 1,
+	MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE_2 = 0x20 << 1,
 };
 
 #define MLX5_GENERAL_OBJ_TYPES_CAP_VIRTQ_NET_Q \
@@ -1661,6 +1662,29 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties;
 };
 
+struct mlx5_ifc_cmd_hca_cap_2_bits {
+	u8 reserved_at_0[0x80]; /* End of DW4. */
+	u8 reserved_at_80[0xb];
+	u8 log_max_num_reserved_qpn[0x5];
+	u8 reserved_at_90[0x3];
+	u8 log_reserved_qpn_granularity[0x5];
+	u8 reserved_at_98[0x3];
+	u8 log_reserved_qpn_max_alloc[0x5]; /* End of DW5. */
+	u8 max_reformat_insert_size[0x8];
+	u8 max_reformat_insert_offset[0x8];
+	u8 max_reformat_remove_size[0x8];
+	u8 max_reformat_remove_offset[0x8]; /* End of DW6. */
+	u8 aso_conntrack_reg_id[0x8];
+	u8 reserved_at_c8[0x3];
+	u8 log_conn_track_granularity[0x5];
+	u8 reserved_at_d0[0x3];
+	u8 log_conn_track_max_alloc[0x5];
+	u8 reserved_at_d8[0x3];
+	u8 log_max_conn_track_offload[0x5];
+	u8 reserved_at_e0[0x20]; /* End of DW7. */
+	u8 reserved_at_100[0x700];
+};
+
 union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
 	struct mlx5_ifc_per_protocol_networking_offload_caps_bits
@@ -2599,6 +2623,67 @@ struct mlx5_ifc_create_flow_meter_aso_in_bits {
 	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
 	struct mlx5_ifc_flow_meter_aso_bits flow_meter_aso;
 };
+
+struct mlx5_ifc_tcp_window_params_bits {
+	u8 max_ack[0x20];
+	u8 max_win[0x20];
+	u8 reply_end[0x20];
+	u8 sent_end[0x20];
+};
+
+struct mlx5_ifc_conn_track_aso_bits {
+	struct mlx5_ifc_tcp_window_params_bits reply_dir; /* End of DW3. */
+	struct mlx5_ifc_tcp_window_params_bits original_dir; /* End of DW7. */
+	u8 last_end[0x20]; /* End of DW8. */
+	u8 last_ack[0x20]; /* End of DW9. */
+	u8 last_seq[0x20]; /* End of DW10. */
+	u8 last_win[0x10];
+	u8 reserved_at_170[0xa];
+	u8 last_dir[0x1];
+	u8 last_index[0x5]; /* End of DW11. */
+	u8 reserved_at_180[0x40]; /* End of DW13. */
+	u8 reply_direction_tcp_scale[0x4];
+	u8 reply_direction_tcp_close_initiated[0x1];
+	u8 reply_direction_tcp_liberal_enabled[0x1];
+	u8 reply_direction_tcp_data_unacked[0x1];
+	u8 reply_direction_tcp_max_ack[0x1];
+	u8 reserved_at_1c8[0x8];
+	u8 original_direction_tcp_scale[0x4];
+	u8 original_direction_tcp_close_initiated[0x1];
+	u8 original_direction_tcp_liberal_enabled[0x1];
+	u8 original_direction_tcp_data_unacked[0x1];
+	u8 original_direction_tcp_max_ack[0x1];
+	u8 reserved_at_1d8[0x8]; /* End of DW14. */
+	u8 valid[0x1];
+	u8 state[0x3];
+	u8 freeze_track[0x1];
+	u8 reserved_at_1e5[0xb];
+	u8 reserved_at_1f0[0x1];
+	u8 connection_assured[0x1];
+	u8 sack_permitted[0x1];
+	u8 challenged_acked[0x1];
+	u8 heartbeat[0x1];
+	u8 max_ack_window[0x3];
+	u8 reserved_at_1f8[0x1];
+	u8 retransmission_counter[0x3];
+	u8 retranmission_limit_exceeded[0x1];
+	u8 retranmission_limit[0x3]; /* End of DW15. */
+};
+
+struct mlx5_ifc_conn_track_offload_bits {
+	u8 modify_field_select[0x40];
+	u8 reserved_at_40[0x40];
+	u8 reserved_at_80[0x8];
+	u8 conn_track_aso_access_pd[0x18];
+	u8 reserved_at_a0[0x160];
+	struct mlx5_ifc_conn_track_aso_bits conn_track_aso;
+};
+
+struct mlx5_ifc_create_conn_track_aso_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+	struct mlx5_ifc_conn_track_offload_bits conn_track_offload;
+};
+
 enum mlx5_access_aso_opc_mod {
 	ASO_OPC_MOD_IPSEC = 0x0,
 	ASO_OPC_MOD_CONNECTION_TRACKING = 0x1,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 02/17] common/mlx5: add CT offload capability checking
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 01/17] common/mlx5: add connection tracking object definition Bing Zhao
@ 2021-05-05  4:19   ` Bing Zhao
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 03/17] net/mlx5: use meter color reg for CT Bing Zhao
                     ` (15 subsequent siblings)
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

During startup, the ASO connection tracking offload capability could
be queried via HCA_CAP_QUERY command. If the HW doesn't support ASO
CT, the value would be 0 by default. The following initialization
should be skipped and the creation of the CT object should return
a failure directly.

The following CT creation should also check this capability. With
the old driver, the pre-processing macro should be used in order to
make the compiling pass.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/linux/meson.build | 2 ++
 drivers/common/mlx5/mlx5_devx_cmds.c  | 3 +++
 drivers/common/mlx5/mlx5_devx_cmds.h  | 1 +
 drivers/common/mlx5/mlx5_prm.h        | 3 +++
 4 files changed, 9 insertions(+)

diff --git a/drivers/common/mlx5/linux/meson.build b/drivers/common/mlx5/linux/meson.build
index 3334bd5cb2..007834a49b 100644
--- a/drivers/common/mlx5/linux/meson.build
+++ b/drivers/common/mlx5/linux/meson.build
@@ -189,6 +189,8 @@ has_sym_args = [
             'MLX5_WQE_UMR_CTRL_FLAG_INLINE' ],
         [ 'HAVE_MLX5_DR_FLOW_DUMP_RULE', 'infiniband/mlx5dv.h',
             'mlx5dv_dump_dr_rule' ],
+        [ 'HAVE_MLX5_DR_ACTION_ASO_CT', 'infiniband/mlx5dv.h',
+            'MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_INITIATOR' ],
 ]
 config = configuration_data()
 foreach arg:has_sym_args
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 79fff6457c..ad67883fde 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -760,6 +760,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
 		MLX5_GET(cmd_hca_cap, hcattr, umr_indirect_mkey_disabled);
 	attr->umr_modify_entity_size_disabled =
 		MLX5_GET(cmd_hca_cap, hcattr, umr_modify_entity_size_disabled);
+	attr->ct_offload = !!(MLX5_GET64(cmd_hca_cap, hcattr,
+					 general_obj_types) &
+			      MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD);
 	if (attr->qos.sup) {
 		MLX5_SET(query_hca_cap_in, in, op_mod,
 			 MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP |
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 870bdb6b30..746320cf04 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -137,6 +137,7 @@ struct mlx5_hca_attr {
 	uint32_t qp_ts_format:2;
 	uint32_t regex:1;
 	uint32_t reg_c_preserve:1;
+	uint32_t ct_offload:1; /* General obj type ASO CT offload supported. */
 	uint32_t regexp_num_of_engines;
 	uint32_t log_max_ft_sampler_num:8;
 	uint32_t geneve_tlv_opt;
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 4da89d3379..71bdf43668 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1134,6 +1134,8 @@ enum {
 			(1ULL << MLX5_GENERAL_OBJ_TYPE_FLOW_METER_ASO)
 #define MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT \
 			(1ULL << MLX5_OBJ_TYPE_GENEVE_TLV_OPT)
+#define MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD \
+			(1ULL << MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD)
 
 enum {
 	MLX5_HCA_CAP_OPMOD_GET_MAX   = 0,
@@ -2456,6 +2458,7 @@ enum {
 	MLX5_GENERAL_OBJ_TYPE_FLEX_PARSE_GRAPH = 0x0022,
 	MLX5_GENERAL_OBJ_TYPE_FLOW_METER_ASO = 0x0024,
 	MLX5_GENERAL_OBJ_TYPE_FLOW_HIT_ASO = 0x0025,
+	MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD = 0x0031,
 };
 
 struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 03/17] net/mlx5: use meter color reg for CT
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 01/17] common/mlx5: add connection tracking object definition Bing Zhao
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 02/17] common/mlx5: add CT offload capability checking Bing Zhao
@ 2021-05-05  4:19   ` Bing Zhao
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 04/17] net/mlx5: initialization of CT management Bing Zhao
                     ` (14 subsequent siblings)
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Based on the capacity, 3 registers could be used. Due to the register
allocation, only the one REG_C_3 for meter color could be reused
right now.

Then in the same flow, no more than one ASO action can be supported.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.c | 4 +++-
 drivers/net/mlx5/mlx5_flow.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index a7ceafe221..edad6007a8 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -760,7 +760,9 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
 			       REG_C_3;
 	case MLX5_MTR_COLOR:
-	case MLX5_ASO_FLOW_HIT: /* Both features use the same REG_C. */
+	case MLX5_ASO_FLOW_HIT:
+	case MLX5_ASO_CONNTRACK:
+		/* All features use the same REG_C. */
 		MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
 		return priv->mtr_color_reg;
 	case MLX5_COPY_MARK:
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index cc3e79d088..964e13a869 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -84,6 +84,7 @@ enum mlx5_feature_name {
 	MLX5_MTR_COLOR,
 	MLX5_MTR_ID,
 	MLX5_ASO_FLOW_HIT,
+	MLX5_ASO_CONNTRACK,
 };
 
 /* Default queue number. */
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 04/17] net/mlx5: initialization of CT management
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (2 preceding siblings ...)
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 03/17] net/mlx5: use meter color reg for CT Bing Zhao
@ 2021-05-05  4:19   ` Bing Zhao
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 05/17] common/mlx5: add Dexv CT objects creation Bing Zhao
                     ` (13 subsequent siblings)
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The definitions of ASO connection tracking objects management
structures are added.

Considering performance, the bulk allocation of ASO CT objects
should be used. The maximal value per bulk and the granularity could
be fetched from HCA capabilities 2. Right now, a fixed number of 64
is used for each bulk for a better management purpose.

The ASO QP for CT is initialized, the SQ will be used for both
modify and query command.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c | 13 +++++++++
 drivers/net/mlx5/mlx5.c          | 36 +++++++++++++++++++++++
 drivers/net/mlx5/mlx5.h          | 50 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_aso.c | 50 ++++++++++++++++++++++++++++++++
 4 files changed, 149 insertions(+)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 479ee7d8d1..5ac787106d 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1323,6 +1323,19 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			DRV_LOG(DEBUG, "Flow Hit ASO is supported.");
 		}
 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */
+#if defined(HAVE_MLX5_DR_CREATE_ACTION_ASO) && \
+	defined(HAVE_MLX5_DR_ACTION_ASO_CT)
+		if (config->hca_attr.ct_offload &&
+		    priv->mtr_color_reg == REG_C_3) {
+			err = mlx5_flow_aso_ct_mng_init(sh);
+			if (err) {
+				err = -err;
+				goto error;
+			}
+			DRV_LOG(DEBUG, "CT ASO is supported.");
+			sh->ct_aso_en = 1;
+		}
+#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO && HAVE_MLX5_DR_ACTION_ASO_CT */
 #if defined(HAVE_MLX5DV_DR) && defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_SAMPLE)
 		if (config->hca_attr.log_max_ft_sampler_num > 0  &&
 		    config->dv_flow_en) {
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 8cd6f1eaee..86dbe6d573 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -670,6 +670,42 @@ mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh)
 	}
 }
 
+/*
+ * Initialize the ASO connection tracking structure.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
+{
+	int err;
+
+	if (sh->ct_mng)
+		return 0;
+	sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng),
+				 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+	if (!sh->ct_mng) {
+		DRV_LOG(ERR, "ASO CT management allocation failed.");
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
+	if (err) {
+		mlx5_free(sh->ct_mng);
+		/* rte_errno should be extracted from the failure. */
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	rte_spinlock_init(&sh->ct_mng->ct_sl);
+	rte_rwlock_init(&sh->ct_mng->resize_rwl);
+	LIST_INIT(&sh->ct_mng->free_cts);
+	return 0;
+}
+
 /**
  * Initialize the flow resources' indexed mempool.
  *
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c62977613a..1a5c78fa3a 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -983,6 +983,52 @@ struct mlx5_bond_info {
 	} ports[MLX5_BOND_MAX_PORTS];
 };
 
+/* Number of connection tracking objects per pool: must be a power of 2. */
+#define MLX5_ASO_CT_ACTIONS_PER_POOL 64
+
+/* ASO Conntrack state. */
+enum mlx5_aso_ct_state {
+	ASO_CONNTRACK_FREE, /* Inactive, in the free list. */
+	ASO_CONNTRACK_WAIT, /* WQE sent in the SQ. */
+	ASO_CONNTRACK_READY, /* CQE received w/o error. */
+	ASO_CONNTRACK_QUERY, /* WQE for query sent. */
+	ASO_CONNTRACK_MAX, /* Guard. */
+};
+
+/* Generic ASO connection tracking structure. */
+struct mlx5_aso_ct_action {
+	LIST_ENTRY(mlx5_aso_ct_action) next; /* Pointer to the next ASO CT. */
+	void *dr_action_orig; /* General action object for original dir. */
+	void *dr_action_rply; /* General action object for reply dir. */
+	uint32_t refcnt; /* Action used count in device flows. */
+	uint16_t offset; /* Offset of ASO CT in DevX objects bulk. */
+	uint16_t peer; /* The only peer port index could also use this CT. */
+	enum mlx5_aso_ct_state state; /* ASO CT state. */
+	bool is_original; /* The direction of the DR action to be used. */
+};
+
+/* ASO connection tracking software pool definition. */
+struct mlx5_aso_ct_pool {
+	uint16_t index; /* Pool index in pools array. */
+	struct mlx5_devx_obj *devx_obj;
+	/* The first devx object in the bulk, used for freeing (not yet). */
+	struct mlx5_aso_ct_action actions[MLX5_ASO_CT_ACTIONS_PER_POOL];
+	/* CT action structures bulk. */
+};
+
+LIST_HEAD(aso_ct_list, mlx5_aso_ct_action);
+
+/* Pools management structure for ASO connection tracking pools. */
+struct mlx5_aso_ct_pools_mng {
+	struct mlx5_aso_ct_pool **pools;
+	uint16_t n; /* Total number of pools. */
+	uint16_t next; /* Number of pools in use, index of next free pool. */
+	rte_spinlock_t ct_sl; /* The ASO CT free list lock. */
+	rte_rwlock_t resize_rwl; /* The ASO CT pool resize lock. */
+	struct aso_ct_list free_cts; /* Free ASO CT objects list. */
+	struct mlx5_aso_sq aso_sq; /* ASO queue objects. */
+};
+
 /*
  * Shared Infiniband device context for Master/Representors
  * which belong to same IB device with multiple IB ports.
@@ -996,6 +1042,7 @@ struct mlx5_dev_ctx_shared {
 	uint32_t sq_ts_format:2; /* SQ timestamp formats supported. */
 	uint32_t qp_ts_format:2; /* QP timestamp formats supported. */
 	uint32_t meter_aso_en:1; /* Flow Meter ASO is supported. */
+	uint32_t ct_aso_en:1; /* Connection Tracking ASO is supported. */
 	uint32_t max_port; /* Maximal IB device port index. */
 	struct mlx5_bond_info bond; /* Bonding information. */
 	void *ctx; /* Verbs/DV/DevX context. */
@@ -1058,6 +1105,8 @@ struct mlx5_dev_ctx_shared {
 	rte_spinlock_t geneve_tlv_opt_sl; /* Lock for geneve tlv resource */
 	struct mlx5_flow_mtr_mng *mtrmng;
 	/* Meter management structure. */
+	struct mlx5_aso_ct_pools_mng *ct_mng;
+	/* Management data for ASO connection tracking. */
 	struct mlx5_dev_shared_port port[]; /* per device port data array. */
 };
 
@@ -1355,6 +1404,7 @@ bool mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev);
 int mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev);
 int mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh);
 int mlx5_aso_flow_mtrs_mng_init(struct mlx5_dev_ctx_shared *sh);
+int mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh);
 
 /* mlx5_ethdev.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 300987d0e9..5edbcd1721 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -186,6 +186,43 @@ mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq)
 	}
 }
 
+/*
+ * Initialize Send Queue used for ASO connection tracking.
+ *
+ * @param[in] sq
+ *   ASO SQ to initialize.
+ */
+static void
+mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
+{
+	volatile struct mlx5_aso_wqe *restrict wqe;
+	int i;
+	int size = 1 << sq->log_desc_n;
+	uint64_t addr;
+
+	/* All the next fields state should stay constant. */
+	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
+		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
+							  (sizeof(*wqe) >> 4));
+		/* One unique MR for the query data. */
+		wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.mkey->id);
+		/* Magic number 64 represents the length of a ASO CT obj. */
+		addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64);
+		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
+		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
+		/*
+		 * The values of operand_masks are different for modify
+		 * and query.
+		 * And data_mask may be different for each modification. In
+		 * query, it could be zero and ignored.
+		 * CQE generation is always needed, in order to decide when
+		 * it is available to create the flow or read the data.
+		 */
+		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
+						   MLX5_COMP_MODE_OFFSET);
+	}
+}
+
 /**
  * Create Send Queue used for ASO access.
  *
@@ -293,6 +330,19 @@ mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
 			return -1;
 		mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
 		break;
+	case ASO_OPC_MOD_CONNECTION_TRACKING:
+		/* 64B per object for query. */
+		if (mlx5_aso_reg_mr(sh->ctx, 64 * sq_desc_n,
+				    &sh->ct_mng->aso_sq.mr, 0))
+			return -1;
+		if (mlx5_aso_sq_create(sh->ctx, &sh->ct_mng->aso_sq, 0,
+				sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
+				sh->sq_ts_format)) {
+			mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
+			return -1;
+		}
+		mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
+		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
 		return -1;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 05/17] common/mlx5: add Dexv CT objects creation
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (3 preceding siblings ...)
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 04/17] net/mlx5: initialization of CT management Bing Zhao
@ 2021-05-05  4:19   ` Bing Zhao
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 06/17] net/mlx5: add modify support for CT Bing Zhao
                     ` (12 subsequent siblings)
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Adding support for connection tracking ASO creation via Devx command.
Right now only bulk creation is supported.

By default, the objects with zero contents will be created. Before
using a single object, the modification via posting a WQE to the ASO
CT SQ is needed.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c | 50 ++++++++++++++++++++++++++++
 drivers/common/mlx5/mlx5_devx_cmds.h |  4 +++
 drivers/common/mlx5/version.map      |  1 +
 3 files changed, 55 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index ad67883fde..dc01266642 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -2232,6 +2232,56 @@ mlx5_devx_cmd_create_flow_meter_aso_obj(void *ctx, uint32_t pd,
 	return flow_meter_aso_obj;
 }
 
+/*
+ * Create general object of type CONN_TRACK_OFFLOAD using DevX API.
+ *
+ * @param[in] ctx
+ *   Context returned from mlx5 open_device() glue function.
+ * @param [in] pd
+ *   PD value to associate the CONN_TRACK_OFFLOAD ASO object with.
+ * @param [in] log_obj_size
+ *   log_obj_size to allocate its power of 2 * objects
+ *   in one CONN_TRACK_OFFLOAD bulk allocation.
+ *
+ * @return
+ *   The DevX object created, NULL otherwise and rte_errno is set.
+ */
+struct mlx5_devx_obj *
+mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx, uint32_t pd,
+					    uint32_t log_obj_size)
+{
+	uint32_t in[MLX5_ST_SZ_DW(create_conn_track_aso_in)] = {0};
+	uint32_t out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	struct mlx5_devx_obj *ct_aso_obj;
+	void *ptr;
+
+	ct_aso_obj = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ct_aso_obj),
+				 0, SOCKET_ID_ANY);
+	if (!ct_aso_obj) {
+		DRV_LOG(ERR, "Failed to allocate CONN_TRACK_OFFLOAD object.");
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	ptr = MLX5_ADDR_OF(create_conn_track_aso_in, in, hdr);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, obj_type,
+		 MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, log_obj_range, log_obj_size);
+	ptr = MLX5_ADDR_OF(create_conn_track_aso_in, in, conn_track_offload);
+	MLX5_SET(conn_track_offload, ptr, conn_track_aso_access_pd, pd);
+	ct_aso_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
+						     out, sizeof(out));
+	if (!ct_aso_obj->obj) {
+		rte_errno = errno;
+		DRV_LOG(ERR, "Failed to create CONN_TRACK_OFFLOAD obj by using DevX.");
+		mlx5_free(ct_aso_obj);
+		return NULL;
+	}
+	ct_aso_obj->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	return ct_aso_obj;
+}
+
 /**
  * Create general object of type GENEVE TLV option using DevX API.
  *
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 746320cf04..e67cea506d 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -569,6 +569,10 @@ struct mlx5_devx_obj *mlx5_devx_cmd_queue_counter_alloc(void *ctx);
 __rte_internal
 int mlx5_devx_cmd_queue_counter_query(struct mlx5_devx_obj *dcs, int clear,
 				      uint32_t *out_of_buffers);
+__rte_internal
+struct mlx5_devx_obj *mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx,
+					uint32_t pd, uint32_t log_obj_size);
+
 /**
  * Create general object of type FLOW_METER_ASO using DevX API..
  *
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index 18dc96276d..4bbcba5b8e 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -13,6 +13,7 @@ INTERNAL {
 	mlx5_dev_to_pci_addr; # WINDOWS_NO_EXPORT
 
 	mlx5_devx_cmd_alloc_pd;
+	mlx5_devx_cmd_create_conn_track_offload_obj;
 	mlx5_devx_cmd_create_cq;
 	mlx5_devx_cmd_create_flex_parser;
 	mlx5_devx_cmd_create_qp;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 06/17] net/mlx5: add modify support for CT
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (4 preceding siblings ...)
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 05/17] common/mlx5: add Dexv CT objects creation Bing Zhao
@ 2021-05-05  4:19   ` Bing Zhao
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 07/17] net/mlx5: add actions creating " Bing Zhao
                     ` (11 subsequent siblings)
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After the connection tracking object bulk is allocated, all the
objects' contents are filled with zero by default. Every
new-allocated object must be modified via WQE operation before it is
used.

In order to reduce the latency for the flow creation, an asynchronous
way is used instead of busy waiting for the CQE to be generated.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |   8 +
 drivers/net/mlx5/mlx5_flow.h     |   3 +
 drivers/net/mlx5/mlx5_flow_aso.c | 252 +++++++++++++++++++++++++++++++
 3 files changed, 263 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1a5c78fa3a..1898a0401f 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -490,6 +490,7 @@ struct mlx5_aso_sq_elem {
 			uint16_t burst_size;
 		};
 		struct mlx5_aso_mtr *mtr;
+		struct mlx5_aso_ct_action *ct;
 	};
 };
 
@@ -1007,6 +1008,10 @@ struct mlx5_aso_ct_action {
 	bool is_original; /* The direction of the DR action to be used. */
 };
 
+/* CT action object state update. */
+#define MLX5_ASO_CT_UPDATE_STATE(c, s) \
+	__atomic_store_n(&((c)->state), (s), __ATOMIC_RELAXED)
+
 /* ASO connection tracking software pool definition. */
 struct mlx5_aso_ct_pool {
 	uint16_t index; /* Pool index in pools array. */
@@ -1690,5 +1695,8 @@ int mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr);
 int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr);
+int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			      struct mlx5_aso_ct_action *ct,
+			      const struct rte_flow_action_conntrack *profile);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 964e13a869..eb5b53ac6a 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -45,6 +45,7 @@ enum mlx5_rte_flow_action_type {
 enum {
 	MLX5_INDIRECT_ACTION_TYPE_RSS,
 	MLX5_INDIRECT_ACTION_TYPE_AGE,
+	MLX5_INDIRECT_ACTION_TYPE_AGE,
 };
 
 /* Matches on selected register. */
@@ -839,6 +840,8 @@ struct mlx5_flow {
 #define MLX5_ASO_WQE_CQE_RESPONSE_DELAY 10u
 #define MLX5_MTR_POLL_WQE_CQE_TIMES 100000u
 
+#define MLX5_CT_POLL_WQE_CQE_TIMES MLX5_MTR_POLL_WQE_CQE_TIMES
+
 #define MLX5_MAN_WIDTH 8
 /* Legacy Meter parameter structure. */
 struct mlx5_legacy_flow_meter {
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 5edbcd1721..28b0c05814 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -887,3 +887,255 @@ mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 			mtr->offset);
 	return -1;
 }
+
+/*
+ * Post a WQE to the ASO CT SQ to modify the context.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ * @param[in] ct
+ *   Pointer to the generic CT structure related to the context.
+ * @param[in] profile
+ *   Pointer to configuration profile.
+ *
+ * @return
+ *   1 on success (WQE number), 0 on failure.
+ */
+static uint16_t
+mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
+			      struct mlx5_aso_ct_action *ct,
+			      const struct rte_flow_action_conntrack *profile)
+{
+	volatile struct mlx5_aso_wqe *wqe = NULL;
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t res;
+	struct mlx5_aso_ct_pool *pool;
+	void *desg;
+	void *orig_dir;
+	void *reply_dir;
+
+	rte_spinlock_lock(&sq->sqsl);
+	/* Prevent other threads to update the index. */
+	res = size - (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!res)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
+		return 0;
+	}
+	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
+	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
+	/* Fill next WQE. */
+	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
+	sq->elts[sq->head & mask].ct = ct;
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	/* Each WQE will have a single CT object. */
+	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
+						  ct->offset);
+	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
+			(ASO_OPC_MOD_CONNECTION_TRACKING <<
+			 WQE_CSEG_OPC_MOD_OFFSET) |
+			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
+	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
+			(0u |
+			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
+			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
+	wqe->aso_cseg.data_mask = UINT64_MAX;
+	/* To make compiler happy. */
+	desg = (void *)(uintptr_t)wqe->aso_dseg.data;
+	MLX5_SET(conn_track_aso, desg, valid, 1);
+	MLX5_SET(conn_track_aso, desg, state, profile->state);
+	MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable);
+	MLX5_SET(conn_track_aso, desg, connection_assured,
+		 profile->live_connection);
+	MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack);
+	MLX5_SET(conn_track_aso, desg, challenged_acked,
+		 profile->challenge_ack_passed);
+	/* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */
+	MLX5_SET(conn_track_aso, desg, heartbeat, 0);
+	MLX5_SET(conn_track_aso, desg, max_ack_window,
+		 profile->max_ack_window);
+	MLX5_SET(conn_track_aso, desg, retransmission_counter, 0);
+	MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0);
+	MLX5_SET(conn_track_aso, desg, retranmission_limit,
+		 profile->retransmission_limit);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale,
+		 profile->reply_dir.scale);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated,
+		 profile->reply_dir.close_initiated);
+	/* Both directions will use the same liberal mode. */
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled,
+		 profile->liberal_mode);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked,
+		 profile->reply_dir.data_unacked);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack,
+		 profile->reply_dir.last_ack_seen);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale,
+		 profile->original_dir.scale);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated,
+		 profile->original_dir.close_initiated);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled,
+		 profile->liberal_mode);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked,
+		 profile->original_dir.data_unacked);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack,
+		 profile->original_dir.last_ack_seen);
+	MLX5_SET(conn_track_aso, desg, last_win, profile->last_window);
+	MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction);
+	MLX5_SET(conn_track_aso, desg, last_index, profile->last_index);
+	MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq);
+	MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack);
+	MLX5_SET(conn_track_aso, desg, last_end, profile->last_end);
+	orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir);
+	MLX5_SET(tcp_window_params, orig_dir, sent_end,
+		 profile->original_dir.sent_end);
+	MLX5_SET(tcp_window_params, orig_dir, reply_end,
+		 profile->original_dir.reply_end);
+	MLX5_SET(tcp_window_params, orig_dir, max_win,
+		 profile->original_dir.max_win);
+	MLX5_SET(tcp_window_params, orig_dir, max_ack,
+		 profile->original_dir.max_ack);
+	reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir);
+	MLX5_SET(tcp_window_params, reply_dir, sent_end,
+		 profile->reply_dir.sent_end);
+	MLX5_SET(tcp_window_params, reply_dir, reply_end,
+		 profile->reply_dir.reply_end);
+	MLX5_SET(tcp_window_params, reply_dir, max_win,
+		 profile->reply_dir.max_win);
+	MLX5_SET(tcp_window_params, reply_dir, max_ack,
+		 profile->reply_dir.max_ack);
+	sq->head++;
+	sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
+	rte_io_wmb();
+	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
+	rte_wmb();
+	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
+	rte_wmb();
+	rte_spinlock_unlock(&sq->sqsl);
+	return 1;
+}
+
+/*
+ * Update the status field of CTs to indicate ready to be used by flows.
+ * A continuous number of CTs since last update.
+ *
+ * @param[in] sq
+ *   Pointer to ASO CT SQ.
+ * @param[in] num
+ *   Number of CT structures to be updated.
+ *
+ * @return
+ *   0 on success, a negative value.
+ */
+static void
+mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
+{
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t i;
+	struct mlx5_aso_ct_action *ct = NULL;
+	uint16_t idx;
+
+	for (i = 0; i < num; i++) {
+		idx = (uint16_t)((sq->tail + i) & mask);
+		ct = sq->elts[idx].ct;
+		MLX5_ASSERT(ct);
+		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
+	}
+}
+
+/*
+ * Handle completions from WQEs sent to ASO CT.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ */
+static void
+mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
+{
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	struct mlx5_aso_cq *cq = &sq->cq;
+	volatile struct mlx5_cqe *restrict cqe;
+	const uint32_t cq_size = 1 << cq->log_desc_n;
+	const uint32_t mask = cq_size - 1;
+	uint32_t idx;
+	uint32_t next_idx;
+	uint16_t max;
+	uint16_t n = 0;
+	int ret;
+
+	rte_spinlock_lock(&sq->sqsl);
+	max = (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!max)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		return;
+	}
+	next_idx = cq->cq_ci & mask;
+	do {
+		idx = next_idx;
+		next_idx = (cq->cq_ci + 1) & mask;
+		/* Need to confirm the position of the prefetch. */
+		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
+		cqe = &cq->cq_obj.cqes[idx];
+		ret = check_cqe(cqe, cq_size, cq->cq_ci);
+		/*
+		 * Be sure owner read is done before any other cookie field or
+		 * opaque field.
+		 */
+		rte_io_rmb();
+		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
+			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
+				break;
+			mlx5_aso_cqe_err_handle(sq);
+		} else {
+			n++;
+		}
+		cq->cq_ci++;
+	} while (1);
+	if (likely(n)) {
+		mlx5_aso_ct_status_update(sq, n);
+		sq->tail += n;
+		rte_io_wmb();
+		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
+	}
+	rte_spinlock_unlock(&sq->sqsl);
+}
+
+/*
+ * Update connection tracking ASO context by sending WQE.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ * @param[in] profile
+ *   Pointer to connection tracking TCP parameter.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			  struct mlx5_aso_ct_action *ct,
+			  const struct rte_flow_action_conntrack *profile)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+
+	MLX5_ASSERT(ct);
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		if (mlx5_aso_ct_sq_enqueue_single(mng, ct, profile))
+			return 0;
+		/* Waiting for wqe resource. */
+		rte_delay_us_sleep(10u);
+	} while (--poll_wqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+}
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 07/17] net/mlx5: add actions creating for CT
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (5 preceding siblings ...)
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 06/17] net/mlx5: add modify support for CT Bing Zhao
@ 2021-05-05  4:19   ` Bing Zhao
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 08/17] net/mlx5: close CT management structure Bing Zhao
                     ` (10 subsequent siblings)
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Allocating a CT from the management pools and creating the DR actions
for both directions by default.

If there is no available connection tracking action, a new pool will
be created with a fixed size bulk allocation. Right now, all the
resources are controlled by the linked list.

The ASO connection tracking context associated with these actions
need to be updated via WQE before using for steering.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h         |   4 +
 drivers/net/mlx5/mlx5_flow.h    |  29 +++-
 drivers/net/mlx5/mlx5_flow_dv.c | 263 ++++++++++++++++++++++++++++++++
 3 files changed, 295 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1898a0401f..de18a59c8e 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -987,6 +987,10 @@ struct mlx5_bond_info {
 /* Number of connection tracking objects per pool: must be a power of 2. */
 #define MLX5_ASO_CT_ACTIONS_PER_POOL 64
 
+/* Generate incremental and unique CT index from pool and offset. */
+#define MLX5_MAKE_CT_IDX(pool, offset) \
+	((pool) * MLX5_ASO_CT_ACTIONS_PER_POOL + (offset) + 1)
+
 /* ASO Conntrack state. */
 enum mlx5_aso_ct_state {
 	ASO_CONNTRACK_FREE, /* Inactive, in the free list. */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index eb5b53ac6a..8f2bc7d2f6 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -45,7 +45,7 @@ enum mlx5_rte_flow_action_type {
 enum {
 	MLX5_INDIRECT_ACTION_TYPE_RSS,
 	MLX5_INDIRECT_ACTION_TYPE_AGE,
-	MLX5_INDIRECT_ACTION_TYPE_AGE,
+	MLX5_INDIRECT_ACTION_TYPE_CT,
 };
 
 /* Matches on selected register. */
@@ -1288,6 +1288,33 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
 	return &pool->mtrs[idx % MLX5_ASO_MTRS_PER_POOL];
 }
 
+/*
+ * Get ASO CT action by index.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index to the ASO CT action.
+ *
+ * @return
+ *   The specified ASO CT action pointer.
+ */
+static inline struct mlx5_aso_ct_action *
+flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_pool *pool;
+
+	idx--;
+	MLX5_ASSERT((idx / MLX5_ASO_CT_ACTIONS_PER_POOL) < mng->n);
+	/* Bit operation AND could be used. */
+	rte_rwlock_read_lock(&mng->resize_rwl);
+	pool = mng->pools[idx / MLX5_ASO_CT_ACTIONS_PER_POOL];
+	rte_rwlock_read_unlock(&mng->resize_rwl);
+	return &pool->actions[idx % MLX5_ASO_CT_ACTIONS_PER_POOL];
+}
+
 int mlx5_flow_group_to_table(struct rte_eth_dev *dev,
 			     const struct mlx5_flow_tunnel *tunnel,
 			     uint32_t group, uint32_t *table,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 0d022dff3f..c8ff693e4c 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11120,6 +11120,262 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
 	return age_idx;
 }
 
+/*
+ * Release an ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index of ASO CT action to release.
+ *
+ * @return
+ *   0 when CT action was removed, otherwise the number of references.
+ */
+static inline int
+flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
+	uint32_t ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
+
+	if (!ret) {
+		if (ct->dr_action_orig) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			claim_zero(mlx5_glue->destroy_flow_action
+					(ct->dr_action_orig));
+#endif
+			ct->dr_action_orig = NULL;
+		}
+		if (ct->dr_action_rply) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			claim_zero(mlx5_glue->destroy_flow_action
+					(ct->dr_action_rply));
+#endif
+			ct->dr_action_rply = NULL;
+		}
+		rte_spinlock_lock(&mng->ct_sl);
+		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
+		rte_spinlock_unlock(&mng->ct_sl);
+	}
+	return ret;
+}
+
+/*
+ * Resize the ASO CT pools array by 64 pools.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value and rte_errno is set.
+ */
+static int
+flow_dv_aso_ct_pools_resize(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	void *old_pools = mng->pools;
+	/* Magic number now, need a macro. */
+	uint32_t resize = mng->n + 64;
+	uint32_t mem_size = sizeof(struct mlx5_aso_ct_pool *) * resize;
+	void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);
+
+	if (!pools) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	rte_rwlock_write_lock(&mng->resize_rwl);
+	/* ASO SQ/QP was already initialized in the startup. */
+	if (old_pools) {
+		/* Realloc could be an alternative choice. */
+		rte_memcpy(pools, old_pools,
+			   mng->n * sizeof(struct mlx5_aso_ct_pool *));
+		mlx5_free(old_pools);
+	}
+	mng->n = resize;
+	mng->pools = pools;
+	rte_rwlock_write_unlock(&mng->resize_rwl);
+	return 0;
+}
+
+/*
+ * Create and initialize a new ASO CT pool.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] ct_free
+ *   Where to put the pointer of a new CT action.
+ *
+ * @return
+ *   The CT actions pool pointer and @p ct_free is set on success,
+ *   NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_aso_ct_pool *
+flow_dv_ct_pool_create(struct rte_eth_dev *dev,
+		       struct mlx5_aso_ct_action **ct_free)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_pool *pool = NULL;
+	struct mlx5_devx_obj *obj = NULL;
+	uint32_t i;
+	uint32_t log_obj_size = rte_log2_u32(MLX5_ASO_CT_ACTIONS_PER_POOL);
+
+	obj = mlx5_devx_cmd_create_conn_track_offload_obj(priv->sh->ctx,
+						priv->sh->pdn, log_obj_size);
+	if (!obj) {
+		rte_errno = ENODATA;
+		DRV_LOG(ERR, "Failed to create conn_track_offload_obj using DevX.");
+		return NULL;
+	}
+	pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool), 0, SOCKET_ID_ANY);
+	if (!pool) {
+		rte_errno = ENOMEM;
+		claim_zero(mlx5_devx_cmd_destroy(obj));
+		return NULL;
+	}
+	pool->devx_obj = obj;
+	pool->index = mng->next;
+	/* Resize pools array if there is no room for the new pool in it. */
+	if (pool->index == mng->n && flow_dv_aso_ct_pools_resize(dev)) {
+		claim_zero(mlx5_devx_cmd_destroy(obj));
+		mlx5_free(pool);
+		return NULL;
+	}
+	mng->pools[pool->index] = pool;
+	mng->next++;
+	/* Assign the first action in the new pool, the rest go to free list. */
+	*ct_free = &pool->actions[0];
+	/* Lock outside, the list operation is safe here. */
+	for (i = 1; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
+		/* refcnt is 0 when allocating the memory. */
+		pool->actions[i].offset = i;
+		LIST_INSERT_HEAD(&mng->free_cts, &pool->actions[i], next);
+	}
+	return pool;
+}
+
+/*
+ * Allocate a ASO CT action from free list.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Index to ASO CT action on success, 0 otherwise and rte_errno is set.
+ */
+static uint32_t
+flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_action *ct = NULL;
+	struct mlx5_aso_ct_pool *pool;
+	uint8_t reg_c;
+	uint32_t ct_idx;
+
+	MLX5_ASSERT(mng);
+	if (!priv->config.devx) {
+		rte_errno = ENOTSUP;
+		return 0;
+	}
+	/* Get a free CT action, if no, a new pool will be created. */
+	rte_spinlock_lock(&mng->ct_sl);
+	ct = LIST_FIRST(&mng->free_cts);
+	if (ct) {
+		LIST_REMOVE(ct, next);
+	} else if (!flow_dv_ct_pool_create(dev, &ct)) {
+		rte_spinlock_unlock(&mng->ct_sl);
+		rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_ACTION,
+				   NULL, "failed to create ASO CT pool");
+		return 0;
+	}
+	rte_spinlock_unlock(&mng->ct_sl);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	ct_idx = MLX5_MAKE_CT_IDX(pool->index, ct->offset);
+	/* 0: inactive, 1: created, 2+: used by flows. */
+	__atomic_store_n(&ct->refcnt, 1, __ATOMIC_RELAXED);
+	reg_c = mlx5_flow_get_reg_id(dev, MLX5_ASO_CONNTRACK, 0, error);
+	if (!ct->dr_action_orig) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+		ct->dr_action_orig = mlx5_glue->dv_create_flow_action_aso
+			(priv->sh->rx_domain, pool->devx_obj->obj,
+			 ct->offset,
+			 MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_INITIATOR,
+			 reg_c - REG_C_0);
+#else
+		RTE_SET_USED(reg_c);
+#endif
+		if (!ct->dr_action_orig) {
+			flow_dv_aso_ct_release(dev, ct_idx);
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					   "failed to create ASO CT action");
+			return 0;
+		}
+	}
+	if (!ct->dr_action_rply) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+		ct->dr_action_rply = mlx5_glue->dv_create_flow_action_aso
+			(priv->sh->rx_domain, pool->devx_obj->obj,
+			 ct->offset,
+			 MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_RESPONDER,
+			 reg_c - REG_C_0);
+#endif
+		if (!ct->dr_action_rply) {
+			flow_dv_aso_ct_release(dev, ct_idx);
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					   "failed to create ASO CT action");
+			return 0;
+		}
+	}
+	return ct_idx;
+}
+
+/*
+ * Create a conntrack object with context and actions by using ASO mechanism.
+ *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
+ * @param[in] pro
+ *   Pointer to conntrack information profile.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Index to conntrack object on success, 0 otherwise.
+ */
+static uint32_t
+flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
+				   const struct rte_flow_action_conntrack *pro,
+				   struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
+	struct mlx5_aso_ct_action *ct;
+	uint32_t idx;
+
+	if (!sh->ct_aso_en)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Connection is not supported");
+	idx = flow_dv_aso_ct_alloc(dev, error);
+	if (!idx)
+		return rte_flow_error_set(error, rte_errno,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Failed to allocate CT object");
+	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (mlx5_aso_ct_update_by_wqe(sh, ct, pro))
+		return rte_flow_error_set(error, EBUSY,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Failed to update CT");
+	return idx;
+}
+
 /**
  * Fill the flow with DV spec, lock free
  * (mutex should be acquired by caller).
@@ -13317,6 +13573,7 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 {
 	uint32_t idx = 0;
 	uint32_t ret = 0;
+	struct mlx5_priv *priv = dev->data->dev_private;
 
 	switch (action->type) {
 	case RTE_FLOW_ACTION_TYPE_RSS:
@@ -13337,6 +13594,12 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 							 (void *)(uintptr_t)idx;
 		}
 		break;
+	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+		ret = flow_dv_translate_create_conntrack(dev, action->conf,
+							 err);
+		idx = (MLX5_INDIRECT_ACTION_TYPE_CT <<
+		       MLX5_INDIRECT_ACTION_TYPE_OFFSET) | ret;
+		break;
 	default:
 		rte_flow_error_set(err, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
 				   NULL, "action type not supported");
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 08/17] net/mlx5: close CT management structure
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (6 preceding siblings ...)
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 07/17] net/mlx5: add actions creating " Bing Zhao
@ 2021-05-05  4:19   ` Bing Zhao
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 09/17] net/mlx5: add ASO CT query implementation Bing Zhao
                     ` (9 subsequent siblings)
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When freeing the IB shared context during stopping a device, the
ASO connection tracking management structure should also be cleaned
up.

All the DR actions created should be destroyed. The structures need
to be freed and ASO CT QP should be released. In the meanwhile, the
allocated and registered memory region for query should also be
deregistered and then freed.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.c          | 56 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_aso.c |  4 +++
 2 files changed, 60 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 86dbe6d573..d563da109a 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -706,6 +706,60 @@ mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
 	return 0;
 }
 
+/*
+ * Close and release all the resources of the
+ * ASO connection tracking management structure.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object to free.
+ */
+static void
+mlx5_flow_aso_ct_mng_close(struct mlx5_dev_ctx_shared *sh)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	struct mlx5_aso_ct_pool *ct_pool;
+	struct mlx5_aso_ct_action *ct;
+	uint32_t idx;
+	uint32_t val;
+	uint32_t cnt;
+	int i;
+
+	mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
+	idx = mng->next;
+	while (idx--) {
+		cnt = 0;
+		ct_pool = mng->pools[idx];
+		for (i = 0; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
+			ct = &ct_pool->actions[i];
+			val = __atomic_fetch_sub(&ct->refcnt, 1,
+						 __ATOMIC_RELAXED);
+			MLX5_ASSERT(val == 1);
+			if (val > 1)
+				cnt++;
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			if (ct->dr_action_orig)
+				claim_zero(mlx5_glue->destroy_flow_action
+							(ct->dr_action_orig));
+			if (ct->dr_action_rply)
+				claim_zero(mlx5_glue->destroy_flow_action
+							(ct->dr_action_rply));
+#endif
+		}
+		claim_zero(mlx5_devx_cmd_destroy(ct_pool->devx_obj));
+		if (cnt) {
+			DRV_LOG(DEBUG, "%u ASO CT objects are being used in the pool %u",
+				cnt, i);
+		}
+		mlx5_free(ct_pool);
+		/* in case of failure. */
+		mng->next--;
+	}
+	mlx5_free(mng->pools);
+	mlx5_free(mng);
+	/* Management structure must be cleared to 0s during allocation. */
+	sh->ct_mng = NULL;
+}
+
 /**
  * Initialize the flow resources' indexed mempool.
  *
@@ -1508,6 +1562,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->mreg_cp_tbl)
 		mlx5_hlist_destroy(priv->mreg_cp_tbl);
 	mlx5_mprq_free_mp(dev);
+	if (priv->sh->ct_mng)
+		mlx5_flow_aso_ct_mng_close(priv->sh);
 	mlx5_os_free_shared_dr(priv);
 	if (priv->rss_conf.rss_key != NULL)
 		mlx5_free(priv->rss_conf.rss_key);
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 28b0c05814..c93764f684 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -372,6 +372,10 @@ mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
 	case ASO_OPC_MOD_POLICER:
 		sq = &sh->mtrmng->pools_mng.sq;
 		break;
+	case ASO_OPC_MOD_CONNECTION_TRACKING:
+		mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
+		sq = &sh->ct_mng->aso_sq;
+		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
 		return;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 09/17] net/mlx5: add ASO CT query implementation
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (7 preceding siblings ...)
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 08/17] net/mlx5: close CT management structure Bing Zhao
@ 2021-05-05  4:19   ` Bing Zhao
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 10/17] net/mlx5: add ASO CT destroy handling Bing Zhao
                     ` (8 subsequent siblings)
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After the connection tracking context is created and being used by
the flows, the context will be updated by the HW automatically after
a packet passed the CT validation. E.g., the ACK, SEQ, window and
state of CT can be updated with both direction traffic.

In order to query the updated contents of this context, a WQE should
be posted to the SQ with a return buffer. The data will be filled
into the buffer. And the profile will be filled with specific value.

During the execution of query command, the context may be updated.
The result of the query command may not be the latest one.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |  10 +-
 drivers/net/mlx5/mlx5_flow_aso.c | 245 +++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c  |  19 +++
 3 files changed, 273 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index de18a59c8e..d2827e78d7 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -490,7 +490,10 @@ struct mlx5_aso_sq_elem {
 			uint16_t burst_size;
 		};
 		struct mlx5_aso_mtr *mtr;
-		struct mlx5_aso_ct_action *ct;
+		struct {
+			struct mlx5_aso_ct_action *ct;
+			char *query_data;
+		};
 	};
 };
 
@@ -1702,5 +1705,10 @@ int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			      struct mlx5_aso_ct_action *ct,
 			      const struct rte_flow_action_conntrack *profile);
+int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+			   struct mlx5_aso_ct_action *ct);
+int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			     struct mlx5_aso_ct_action *ct,
+			     struct rte_flow_action_conntrack *profile);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index c93764f684..271750c609 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -933,6 +933,7 @@ mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
 	/* Fill next WQE. */
 	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
 	sq->elts[sq->head & mask].ct = ct;
+	sq->elts[sq->head & mask].query_data = NULL;
 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
 	/* Each WQE will have a single CT object. */
 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
@@ -1048,9 +1049,95 @@ mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
 		ct = sq->elts[idx].ct;
 		MLX5_ASSERT(ct);
 		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
+		if (sq->elts[idx].query_data)
+			rte_memcpy(sq->elts[idx].query_data,
+				   (char *)((uintptr_t)sq->mr.addr + idx * 64),
+				   64);
 	}
 }
 
+/*
+ * Post a WQE to the ASO CT SQ to query the current context.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ * @param[in] ct
+ *   Pointer to the generic CT structure related to the context.
+ * @param[in] data
+ *   Pointer to data area to be filled.
+ *
+ * @return
+ *   1 on success (WQE number), 0 on failure.
+ */
+static int
+mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
+			    struct mlx5_aso_ct_action *ct, char *data)
+{
+	volatile struct mlx5_aso_wqe *wqe = NULL;
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t res;
+	uint16_t wqe_idx;
+	struct mlx5_aso_ct_pool *pool;
+	enum mlx5_aso_ct_state state =
+				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+
+	if (state == ASO_CONNTRACK_FREE) {
+		DRV_LOG(ERR, "Fail: No context to query");
+		return -1;
+	} else if (state == ASO_CONNTRACK_WAIT) {
+		return 0;
+	}
+	rte_spinlock_lock(&sq->sqsl);
+	res = size - (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!res)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
+		return 0;
+	}
+	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY);
+	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
+	/* Confirm the location and address of the prefetch instruction. */
+	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
+	/* Fill next WQE. */
+	wqe_idx = sq->head & mask;
+	sq->elts[wqe_idx].ct = ct;
+	sq->elts[wqe_idx].query_data = data;
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	/* Each WQE will have a single CT object. */
+	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
+						  ct->offset);
+	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
+			(ASO_OPC_MOD_CONNECTION_TRACKING <<
+			 WQE_CSEG_OPC_MOD_OFFSET) |
+			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
+	/*
+	 * There is no write request is required.
+	 * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
+	 * "BYTEWISE_64BYTE" is needed for a whole context.
+	 * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
+	 * "data_mask" is ignored.
+	 * Buffer address was already filled during initialization.
+	 */
+	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE <<
+					ASO_CSEG_DATA_MASK_MODE_OFFSET);
+	wqe->aso_cseg.data_mask = 0;
+	sq->head++;
+	/*
+	 * Each WQE contains 2 WQEBB's, even though
+	 * data segment is not used in this case.
+	 */
+	sq->pi += 2;
+	rte_io_wmb();
+	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
+	rte_wmb();
+	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
+	rte_wmb();
+	rte_spinlock_unlock(&sq->sqsl);
+	return 1;
+}
+
 /*
  * Handle completions from WQEs sent to ASO CT.
  *
@@ -1143,3 +1230,161 @@ mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		ct->offset, pool->index);
 	return -1;
 }
+
+/*
+ * The routine is used to wait for WQE completion to continue with queried data.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+		       struct mlx5_aso_ct_action *ct)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+
+	if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+	    ASO_CONNTRACK_READY)
+		return 0;
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+		    ASO_CONNTRACK_READY)
+			return 0;
+		/* Waiting for CQE ready, consider should block or sleep. */
+		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
+	} while (--poll_cqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+}
+
+/*
+ * Convert the hardware conntrack data format into the profile.
+ *
+ * @param[in] profile
+ *   Pointer to conntrack profile to be filled after query.
+ * @param[in] wdata
+ *   Pointer to data fetched from hardware.
+ */
+static inline void
+mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
+			char *wdata)
+{
+	void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
+	void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
+
+	/* MLX5_GET16 should be taken into consideration. */
+	profile->state = (enum rte_flow_conntrack_state)
+			 MLX5_GET(conn_track_aso, wdata, state);
+	profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
+	profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
+					  sack_permitted);
+	profile->live_connection = MLX5_GET(conn_track_aso, wdata,
+					    connection_assured);
+	profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
+						 challenged_acked);
+	profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
+					   max_ack_window);
+	profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
+						 retranmission_limit);
+	profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
+	profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
+	profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
+			      MLX5_GET(conn_track_aso, wdata, last_index);
+	profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
+	profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
+	profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
+	profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
+				reply_direction_tcp_liberal_enabled) |
+				MLX5_GET(conn_track_aso, wdata,
+				original_direction_tcp_liberal_enabled);
+	/* No liberal in the RTE structure profile. */
+	profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					    reply_direction_tcp_scale);
+	profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_close_initiated);
+	profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_data_unacked);
+	profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_max_ack);
+	profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
+					       r_dir, sent_end);
+	profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
+						r_dir, reply_end);
+	profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
+					      r_dir, max_win);
+	profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
+					      r_dir, max_ack);
+	profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					       original_direction_tcp_scale);
+	profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_close_initiated);
+	profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_data_unacked);
+	profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_max_ack);
+	profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
+						  o_dir, sent_end);
+	profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
+						   o_dir, reply_end);
+	profile->original_dir.max_win = MLX5_GET(tcp_window_params,
+						 o_dir, max_win);
+	profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
+						 o_dir, max_ack);
+}
+
+/*
+ * Query connection tracking information parameter by send WQE.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ * @param[out] profile
+ *   Pointer to connection tracking TCP information.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			 struct mlx5_aso_ct_action *ct,
+			 struct rte_flow_action_conntrack *profile)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+	char out_data[64 * 2];
+	int ret;
+
+	MLX5_ASSERT(ct);
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		ret = mlx5_aso_ct_sq_query_single(mng, ct, out_data);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			goto data_handle;
+		/* Waiting for wqe resource or state. */
+		else
+			rte_delay_us_sleep(10u);
+	} while (--poll_wqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+data_handle:
+	ret = mlx5_aso_ct_wait_ready(sh, ct);
+	if (!ret)
+		mlx5_aso_ct_obj_analyze(profile, out_data);
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index c8ff693e4c..84e7f0b3d3 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -13775,6 +13775,8 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 	uint32_t act_idx = (uint32_t)(uintptr_t)handle;
 	uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
 
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
@@ -13788,6 +13790,23 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 			resp->sec_since_last_hit = __atomic_load_n
 			     (&age_param->sec_since_last_hit, __ATOMIC_RELAXED);
 		return 0;
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		ct = flow_aso_ct_get_by_idx(dev, idx);
+		if (!ct->refcnt)
+			return rte_flow_error_set(error, EFAULT,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"CT object is inactive");
+		((struct rte_flow_action_conntrack *)data)->peer_port =
+							ct->peer;
+		((struct rte_flow_action_conntrack *)data)->is_original_dir =
+							ct->is_original;
+		if (mlx5_aso_ct_query_by_wqe(priv->sh, ct, data))
+			return rte_flow_error_set(error, EIO,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"Failed to query CT context");
+		return 0;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 10/17] net/mlx5: add ASO CT destroy handling
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (8 preceding siblings ...)
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 09/17] net/mlx5: add ASO CT query implementation Bing Zhao
@ 2021-05-05  4:19   ` Bing Zhao
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 11/17] net/mlx5: add translation of CT action Bing Zhao
                     ` (7 subsequent siblings)
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When trying to destroy an ASO connection tracking context, the DR
action created on this context should also be destroyed. Before
inserting the related software object into the management free list,
the reference count should be checked.

Right now, the context object will not be freed to the system and
will be reused directly from the free list.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_dv.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 84e7f0b3d3..0fa0671ace 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11136,9 +11136,15 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	uint32_t ret;
 	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
-	uint32_t ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
+	enum mlx5_aso_ct_state state =
+			__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
 
+	/* Cannot release when CT is in the ASO SQ. */
+	if (state == ASO_CONNTRACK_WAIT || state == ASO_CONNTRACK_QUERY)
+		return -1;
+	ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
 	if (!ret) {
 		if (ct->dr_action_orig) {
 #ifdef HAVE_MLX5_DR_ACTION_ASO_CT
@@ -11154,6 +11160,8 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 #endif
 			ct->dr_action_rply = NULL;
 		}
+		/* Clear the state to free, no need in 1st allocation. */
+		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_FREE);
 		rte_spinlock_lock(&mng->ct_sl);
 		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
 		rte_spinlock_unlock(&mng->ct_sl);
@@ -13648,6 +13656,12 @@ flow_dv_action_destroy(struct rte_eth_dev *dev,
 			DRV_LOG(DEBUG, "Indirect age action %" PRIu32 " was"
 				" released with references %d.", idx, ret);
 		return 0;
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		ret = flow_dv_aso_ct_release(dev, idx);
+		if (ret)
+			DRV_LOG(DEBUG, "Connection tracking object %u still "
+				"has references %d.", idx, ret);
+		return 0;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 11/17] net/mlx5: add translation of CT action
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (9 preceding siblings ...)
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 10/17] net/mlx5: add ASO CT destroy handling Bing Zhao
@ 2021-05-05  4:19   ` Bing Zhao
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 12/17] net/mlx5: add translation of CT item Bing Zhao
                     ` (6 subsequent siblings)
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When creating a flow with this action context for CT, it needs to be
translated in 2 levels.

First, retrieve from action context to rte_flow action.
Second, translate it to the corresponding DR action with traffic
direction that was specified when creating or updating via
rte_flow_action_handle* API.

Before using the DR action in a flow, the CT context should be
available to use in the hardware. A synchronization is done before
inserting the flow rule with CT action to check the HW availability
of this CT context.

In order to release the DR actions and reuse the context of a CT,
the reference count should also be handled in the the flow
destroying.

The CT index will be recorded in the rte_flow by reusing the ASO age
index to save memory, since only one ASO action is supported in one
flow rule currently. The action context type should also be saved
for CT. When destroying a flow rule, if the context type is CT and
the index is valid (non-zero), the release process should be
handled. By default, the handling will fall back to try to release
the ASO age if any.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |  2 ++
 drivers/net/mlx5/mlx5_flow.c     |  9 +++++++
 drivers/net/mlx5/mlx5_flow.h     |  7 +++++-
 drivers/net/mlx5/mlx5_flow_aso.c | 41 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c  | 28 +++++++++++++++++++++-
 5 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d2827e78d7..d01a10ea54 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1710,5 +1710,7 @@ int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
 int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			     struct mlx5_aso_ct_action *ct,
 			     struct rte_flow_action_conntrack *profile);
+int mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
+			  struct mlx5_aso_ct_action *ct);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index edad6007a8..f36eeae03f 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -3553,6 +3553,15 @@ flow_action_handles_translate(struct rte_eth_dev *dev,
 				break;
 			}
 			/* Fall-through */
+		case MLX5_INDIRECT_ACTION_TYPE_CT:
+			if (priv->sh->ct_aso_en) {
+				translated[handle->index].type =
+					RTE_FLOW_ACTION_TYPE_CONNTRACK;
+				translated[handle->index].conf =
+							 (void *)(uintptr_t)idx;
+				break;
+			}
+			/* Fall-through */
 		default:
 			mlx5_free(translated);
 			return rte_flow_error_set
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 8f2bc7d2f6..286e3fb6a4 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -226,6 +226,7 @@ enum mlx5_feature_name {
 #define MLX5_FLOW_ACTION_TUNNEL_MATCH (1ull << 38)
 #define MLX5_FLOW_ACTION_MODIFY_FIELD (1ull << 39)
 #define MLX5_FLOW_ACTION_METER_WITH_TERMINATED_POLICY (1ull << 40)
+#define MLX5_FLOW_ACTION_CT (1ull << 41)
 
 #define MLX5_FLOW_FATE_ACTIONS \
 	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | \
@@ -969,11 +970,15 @@ struct rte_flow {
 	uint32_t drv_type:2; /**< Driver type. */
 	uint32_t tunnel:1;
 	uint32_t meter:24; /**< Holds flow meter id. */
+	uint32_t indirect_type:2; /**< Indirect action type. */
 	uint32_t rix_mreg_copy;
 	/**< Index to metadata register copy table resource. */
 	uint32_t counter; /**< Holds flow counter. */
 	uint32_t tunnel_id;  /**< Tunnel id */
-	uint32_t age; /**< Holds ASO age bit index. */
+	union {
+		uint32_t age; /**< Holds ASO age bit index. */
+		uint32_t ct; /**< Holds ASO CT index. */
+	};
 	uint32_t geneve_tlv_option; /**< Holds Geneve TLV option id. > */
 } __rte_packed;
 
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 271750c609..9ab4cfdd81 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -1388,3 +1388,44 @@ mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		mlx5_aso_ct_obj_analyze(profile, out_data);
 	return ret;
 }
+
+/*
+ * Make sure the conntrack context is synchronized with hardware before
+ * creating a flow rule that uses it.
+ *
+ * @param[in] sh
+ *   Pointer to shared device context.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
+		      struct mlx5_aso_ct_action *ct)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	enum mlx5_aso_ct_state state =
+				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+
+	if (state == ASO_CONNTRACK_FREE) {
+		rte_errno = ENXIO;
+		return -rte_errno;
+	} else if (state == ASO_CONNTRACK_READY ||
+		   state == ASO_CONNTRACK_QUERY) {
+		return 0;
+	}
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+		if (state == ASO_CONNTRACK_READY ||
+		    state == ASO_CONNTRACK_QUERY)
+			return 0;
+		/* Waiting for CQE ready, consider should block or sleep. */
+		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
+	} while (--poll_cqe_times);
+	rte_errno = EBUSY;
+	return -rte_errno;
+}
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 0fa0671ace..14af900267 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11381,6 +11381,7 @@ flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
 		return rte_flow_error_set(error, EBUSY,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to update CT");
+	ct->is_original = !!pro->is_original_dir;
 	return idx;
 }
 
@@ -11544,6 +11545,8 @@ flow_dv_translate(struct rte_eth_dev *dev,
 		const struct rte_flow_action *found_action = NULL;
 		uint32_t jump_group = 0;
 		struct mlx5_flow_counter *cnt;
+		uint32_t ct_idx;
+		struct mlx5_aso_ct_action *ct;
 
 		if (!mlx5_flow_os_action_supported(action_type))
 			return rte_flow_error_set(error, ENOTSUP,
@@ -12017,6 +12020,26 @@ flow_dv_translate(struct rte_eth_dev *dev,
 				return -rte_errno;
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			break;
+		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+			ct_idx = (uint32_t)(uintptr_t)action->conf;
+			ct = flow_aso_ct_get_by_idx(dev, ct_idx);
+			if (mlx5_aso_ct_available(priv->sh, ct))
+				return rte_flow_error_set(error, rte_errno,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						NULL,
+						"CT is unavailable.");
+			if (ct->is_original)
+				dev_flow->dv.actions[actions_n] =
+							ct->dr_action_orig;
+			else
+				dev_flow->dv.actions[actions_n] =
+							ct->dr_action_rply;
+			flow->indirect_type = MLX5_INDIRECT_ACTION_TYPE_CT;
+			flow->ct = ct_idx;
+			__atomic_fetch_add(&ct->refcnt, 1, __ATOMIC_RELAXED);
+			actions_n++;
+			action_flags |= MLX5_FLOW_ACTION_CT;
+			break;
 		case RTE_FLOW_ACTION_TYPE_END:
 			actions_end = true;
 			if (mhdr_res->actions_num) {
@@ -13152,7 +13175,10 @@ flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
 			mlx5_flow_meter_detach(priv, fm);
 		flow->meter = 0;
 	}
-	if (flow->age)
+	/* Keep the current age handling by default. */
+	if (flow->indirect_type == MLX5_INDIRECT_ACTION_TYPE_CT && flow->ct)
+		flow_dv_aso_ct_release(dev, flow->ct);
+	else if (flow->age)
 		flow_dv_aso_age_release(dev, flow->age);
 	if (flow->geneve_tlv_option) {
 		flow_dv_geneve_tlv_option_resource_release(dev);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 12/17] net/mlx5: add translation of CT item
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (10 preceding siblings ...)
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 11/17] net/mlx5: add translation of CT action Bing Zhao
@ 2021-05-05  4:19   ` Bing Zhao
  2021-05-05  4:20   ` [dpdk-dev] [PATCH v2 13/17] net/mlx5: add CT context update Bing Zhao
                     ` (5 subsequent siblings)
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The return register of the DR action will be used for matching.
After the ASO CT checking of a TCP packet, the syndrome is filled in
the register. Only the 8 LSB should be used. A converting from
RTE_FLOW_CONNTRACK_FLAG* to the syndrome should be done after
checing the spec and mask fields.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    |  7 ++++
 drivers/net/mlx5/mlx5_flow_dv.c | 62 +++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 286e3fb6a4..eb0bb42161 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -405,6 +405,13 @@ enum mlx5_feature_name {
 /* Maximum number of fields to modify in MODIFY_FIELD */
 #define MLX5_ACT_MAX_MOD_FIELDS 5
 
+/* Syndrome bits definition for connection tracking. */
+#define MLX5_CT_SYNDROME_VALID		(0x0 << 6)
+#define MLX5_CT_SYNDROME_INVALID	(0x1 << 6)
+#define MLX5_CT_SYNDROME_TRAP		(0x2 << 6)
+#define MLX5_CT_SYNDROME_STATE_CHANGE	(0x1 << 1)
+#define MLX5_CT_SYNDROME_BAD_PACKET	(0x1 << 0)
+
 enum mlx5_flow_drv_type {
 	MLX5_FLOW_TYPE_MIN,
 	MLX5_FLOW_TYPE_DV,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 14af900267..b0858e3df8 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -9379,6 +9379,64 @@ flow_dv_translate_item_ecpri(struct rte_eth_dev *dev, void *matcher,
 	}
 }
 
+/*
+ * Add connection tracking status item to matcher
+ *
+ * @param[in] dev
+ *   The devich to configure through.
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ */
+static void
+flow_dv_translate_item_aso_ct(struct rte_eth_dev *dev,
+			      void *matcher, void *key,
+			      const struct rte_flow_item *item)
+{
+	uint32_t reg_value = 0;
+	int reg_id;
+	/* 8LSB 0b 11/0000/11, middle 4 bits are reserved. */
+	uint32_t reg_mask = 0;
+	const struct rte_flow_item_conntrack *spec = item->spec;
+	const struct rte_flow_item_conntrack *mask = item->mask;
+	uint32_t flags;
+	struct rte_flow_error error;
+
+	if (!mask)
+		mask = &rte_flow_item_conntrack_mask;
+	if (!spec || !mask->flags)
+		return;
+	flags = spec->flags & mask->flags;
+	/* The conflict should be checked in the validation. */
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_VALID)
+		reg_value |= MLX5_CT_SYNDROME_VALID;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_CHANGED)
+		reg_value |= MLX5_CT_SYNDROME_STATE_CHANGE;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_INVALID)
+		reg_value |= MLX5_CT_SYNDROME_INVALID;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED)
+		reg_value |= MLX5_CT_SYNDROME_TRAP;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD)
+		reg_value |= MLX5_CT_SYNDROME_BAD_PACKET;
+	if (mask->flags & (RTE_FLOW_CONNTRACK_PKT_STATE_VALID |
+			   RTE_FLOW_CONNTRACK_PKT_STATE_INVALID |
+			   RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED))
+		reg_mask |= 0xc0;
+	if (mask->flags & RTE_FLOW_CONNTRACK_PKT_STATE_CHANGED)
+		reg_mask |= MLX5_CT_SYNDROME_STATE_CHANGE;
+	if (mask->flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD)
+		reg_mask |= MLX5_CT_SYNDROME_BAD_PACKET;
+	/* The REG_C_x value could be saved during startup. */
+	reg_id = mlx5_flow_get_reg_id(dev, MLX5_ASO_CONNTRACK, 0, &error);
+	if (reg_id == REG_NON)
+		return;
+	flow_dv_match_meta_reg(matcher, key, (enum modify_reg)reg_id,
+			       reg_value, reg_mask);
+}
+
 static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
 
 #define HEADER_IS_ZERO(match_criteria, headers)				     \
@@ -12322,6 +12380,10 @@ flow_dv_translate(struct rte_eth_dev *dev,
 			/* No other protocol should follow eCPRI layer. */
 			last_item = MLX5_FLOW_LAYER_ECPRI;
 			break;
+		case RTE_FLOW_ITEM_TYPE_CONNTRACK:
+			flow_dv_translate_item_aso_ct(dev, match_mask,
+						      match_value, items);
+			break;
 		default:
 			break;
 		}
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 13/17] net/mlx5: add CT context update
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (11 preceding siblings ...)
  2021-05-05  4:19   ` [dpdk-dev] [PATCH v2 12/17] net/mlx5: add translation of CT item Bing Zhao
@ 2021-05-05  4:20   ` Bing Zhao
  2021-05-05  4:20   ` [dpdk-dev] [PATCH v2 14/17] net/mlx5: validation of CT action Bing Zhao
                     ` (4 subsequent siblings)
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:20 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When updating a connection tracking context, two separate parts
could be updated.
First, the direction. This will only update the traffic direction
recorded in the software for flow creation.
Second, the TCP parameters. The hardware context will be updated
via the WQE. This update will be blocked until the hardware status
is updated and ready for the next flow creation.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_dv.c | 56 +++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index b0858e3df8..6afbbbc4bb 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -13824,6 +13824,60 @@ __flow_dv_action_rss_update(struct rte_eth_dev *dev, uint32_t idx,
 	return ret;
 }
 
+/*
+ * Updates in place conntrack context or direction.
+ * Context update should be synchronized.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   The conntrack object ID to be updated.
+ * @param[in] update
+ *   Pointer to the structure of information to update.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value.
+ */
+static int
+__flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
+			   const struct rte_flow_modify_conntrack *update,
+			   struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
+	const struct rte_flow_action_conntrack *new_prf;
+	int ret = 0;
+
+	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (!ct->refcnt)
+		return rte_flow_error_set(error, ENOMEM,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "CT object is inactive");
+	new_prf = &update->new_ct;
+	if (update->direction)
+		ct->is_original = !!new_prf->is_original_dir;
+	if (update->state) {
+		ret = mlx5_aso_ct_update_by_wqe(priv->sh, ct, new_prf);
+		if (ret)
+			return rte_flow_error_set(error, EIO,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"Failed to send CT context update WQE");
+		/* Block until ready or a failure. */
+		ret = mlx5_aso_ct_available(priv->sh, ct);
+		if (ret)
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					   NULL,
+					   "Timeout to get the CT update");
+	}
+	return ret;
+}
+
 /**
  * Updates in place shared action configuration, lock free,
  * (mutex should be acquired by caller).
@@ -13859,6 +13913,8 @@ flow_dv_action_update(struct rte_eth_dev *dev,
 	case MLX5_INDIRECT_ACTION_TYPE_RSS:
 		action_conf = ((const struct rte_flow_action *)update)->conf;
 		return __flow_dv_action_rss_update(dev, idx, action_conf, err);
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		return __flow_dv_action_ct_update(dev, idx, update, err);
 	default:
 		return rte_flow_error_set(err, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 14/17] net/mlx5: validation of CT action
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (12 preceding siblings ...)
  2021-05-05  4:20   ` [dpdk-dev] [PATCH v2 13/17] net/mlx5: add CT context update Bing Zhao
@ 2021-05-05  4:20   ` Bing Zhao
  2021-05-05  4:20   ` [dpdk-dev] [PATCH v2 15/17] net/mlx5: validation of CT item Bing Zhao
                     ` (3 subsequent siblings)
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:20 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The validation of a CT action contains two parts. The first is the
CT action configurations parameter. When creating a CT action
context, some members need to be verified.

The second is that when creating a flow, the DR action of CT should
be validated with other actions and items as well. Currently, only
the TCP protocol support connection tracking.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h         |  4 ++
 drivers/net/mlx5/mlx5_flow.c    | 31 +++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c | 69 +++++++++++++++++++++++++++++++++
 3 files changed, 104 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d01a10ea54..36b7f05822 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1611,6 +1611,10 @@ int mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow,
 void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
 int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
 			uint32_t nb_contexts, struct rte_flow_error *error);
+int mlx5_validate_action_ct(struct rte_eth_dev *dev,
+			    const struct rte_flow_action_conntrack *conntrack,
+			    struct rte_flow_error *error);
+
 
 /* mlx5_mp_os.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index f36eeae03f..6baaefbaba 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1688,6 +1688,37 @@ mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
 	return 0;
 }
 
+/*
+ * Validate the ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] conntrack
+ *   Pointer to the CT action profile.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_validate_action_ct(struct rte_eth_dev *dev,
+			const struct rte_flow_action_conntrack *conntrack,
+			struct rte_flow_error *error)
+{
+	RTE_SET_USED(dev);
+
+	if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Invalid CT state");
+	if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Invalid last TCP packet flag");
+	return 0;
+}
+
 /**
  * Verify the @p attributes will be correctly understood by the NIC and store
  * them in the @p flow if everything is correct.
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 6afbbbc4bb..f2a2c609e2 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -3400,6 +3400,57 @@ flow_dv_validate_action_raw_encap_decap
 	return 0;
 }
 
+/*
+ * Validate the ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] action_flags
+ *   Holds the actions detected until now.
+ * @param[in] item_flags
+ *   The items found in this flow rule.
+ * @param[in] attr
+ *   Pointer to flow attributes.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_action_aso_ct(struct rte_eth_dev *dev,
+			       uint64_t action_flags,
+			       uint64_t item_flags,
+			       const struct rte_flow_attr *attr,
+			       struct rte_flow_error *error)
+{
+	RTE_SET_USED(dev);
+
+	if (attr->group == 0 && !attr->transfer)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "Only support non-root table");
+	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "CT cannot follow a fate action");
+	if ((action_flags & MLX5_FLOW_ACTION_METER) ||
+	    (action_flags & MLX5_FLOW_ACTION_AGE))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Only one ASO action is supported");
+	if (action_flags & MLX5_FLOW_ACTION_ENCAP)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Encap cannot exist before CT");
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					  "Not a outer TCP packet");
+	return 0;
+}
+
 /**
  * Match encap_decap resource.
  *
@@ -7205,6 +7256,14 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			rw_act_num += ret;
 			break;
+		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+			ret = flow_dv_validate_action_aso_ct(dev, action_flags,
+							     item_flags, attr,
+							     error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_FLOW_ACTION_CT;
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ACTION,
@@ -13861,6 +13920,10 @@ __flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
 	if (update->direction)
 		ct->is_original = !!new_prf->is_original_dir;
 	if (update->state) {
+		/* Only validate the profile when it needs to be updated. */
+		ret = mlx5_validate_action_ct(dev, new_prf, error);
+		if (ret)
+			return ret;
 		ret = mlx5_aso_ct_update_by_wqe(priv->sh, ct, new_prf);
 		if (ret)
 			return rte_flow_error_set(error, EIO,
@@ -15732,6 +15795,12 @@ flow_dv_action_validate(struct rte_eth_dev *dev,
 						NULL,
 					     "shared age action not supported");
 		return flow_dv_validate_action_age(0, action, dev, err);
+	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+		if (!priv->sh->ct_aso_en)
+			return rte_flow_error_set(err, ENOTSUP,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					"ASO CT is not supported");
+		return mlx5_validate_action_ct(dev, action->conf, err);
 	default:
 		return rte_flow_error_set(err, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 15/17] net/mlx5: validation of CT item
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (13 preceding siblings ...)
  2021-05-05  4:20   ` [dpdk-dev] [PATCH v2 14/17] net/mlx5: validation of CT action Bing Zhao
@ 2021-05-05  4:20   ` Bing Zhao
  2021-05-05  4:20   ` [dpdk-dev] [PATCH v2 16/17] net/mlx5: add support of CT between two ports Bing Zhao
                     ` (2 subsequent siblings)
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:20 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The item of ASO connection tracking will be translated into the
register value when matching. The validation of this item has no
dependency on other layers, since the flow including this item
should be jumped from another group. All the layers checking was
already done in the previous groups. Only the state bits conflict
should be checked.

It is assumed that the flow with CT item will always work on the
TCP traffic.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    |  3 ++
 drivers/net/mlx5/mlx5_flow_dv.c | 51 +++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index eb0bb42161..238befa2d4 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -147,6 +147,9 @@ enum mlx5_feature_name {
 #define MLX5_FLOW_LAYER_GENEVE_OPT (UINT64_C(1) << 32)
 #define MLX5_FLOW_LAYER_GTP_PSC (UINT64_C(1) << 33)
 
+/* Conntrack item. */
+#define MLX5_FLOW_LAYER_ASO_CT (UINT64_C(1) << 34)
+
 /* Outer Masks. */
 #define MLX5_FLOW_LAYER_OUTER_L3 \
 	(MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index f2a2c609e2..aa0a5acdca 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -2598,6 +2598,51 @@ flow_dv_validate_item_ipv6_frag_ext(const struct rte_flow_item *item,
 				  "specified range not supported");
 }
 
+/*
+ * Validate ASO CT item.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Pointer to bit-fields that holds the items detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_item_aso_ct(struct rte_eth_dev *dev,
+			     const struct rte_flow_item *item,
+			     uint64_t *item_flags,
+			     struct rte_flow_error *error)
+{
+	const struct rte_flow_item_conntrack *spec = item->spec;
+	const struct rte_flow_item_conntrack *mask = item->mask;
+	RTE_SET_USED(dev);
+	uint32_t flags;
+
+	if (*item_flags & MLX5_FLOW_LAYER_ASO_CT)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+					  "Only one CT is supported");
+	if (!mask)
+		mask = &rte_flow_item_conntrack_mask;
+	flags = spec->flags & mask->flags;
+	if ((flags & RTE_FLOW_CONNTRACK_PKT_STATE_VALID) &&
+	    ((flags & RTE_FLOW_CONNTRACK_PKT_STATE_INVALID) ||
+	     (flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD) ||
+	     (flags & RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED)))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+					  "Conflict status bits");
+	/* State change also needs to be considered. */
+	*item_flags |= MLX5_FLOW_LAYER_ASO_CT;
+	return 0;
+}
+
 /**
  * Validate the pop VLAN action.
  *
@@ -6696,6 +6741,12 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 				return ret;
 			last_item = MLX5_FLOW_LAYER_ECPRI;
 			break;
+		case RTE_FLOW_ITEM_TYPE_CONNTRACK:
+			ret = flow_dv_validate_item_aso_ct(dev, items,
+							   &item_flags, error);
+			if (ret < 0)
+				return ret;
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ITEM,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 16/17] net/mlx5: add support of CT between two ports
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (14 preceding siblings ...)
  2021-05-05  4:20   ` [dpdk-dev] [PATCH v2 15/17] net/mlx5: validation of CT item Bing Zhao
@ 2021-05-05  4:20   ` Bing Zhao
  2021-05-05  4:20   ` [dpdk-dev] [PATCH v2 17/17] doc: update mlx5 support for conntrack Bing Zhao
  2021-05-05  6:05   ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Slava Ovsiienko
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:20 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After creating a connection tracking context, it can be used between
two ports. For each port, the flow for one direction traffic will
be created.

The context can only be shared between the owner port and the peer
port that was specified when being created. Only the owner port
could update the context or query it in current implementation.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    | 57 +++++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_flow_dv.c | 71 +++++++++++++++++++++++++--------
 2 files changed, 110 insertions(+), 18 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 238befa2d4..ddaba40f72 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -48,6 +48,25 @@ enum {
 	MLX5_INDIRECT_ACTION_TYPE_CT,
 };
 
+/* Now, the maximal ports will be supported is 256, action number is 4M. */
+#define MLX5_INDIRECT_ACT_CT_MAX_PORT 0x100
+
+#define MLX5_INDIRECT_ACT_CT_OWNER_SHIFT 22
+#define MLX5_INDIRECT_ACT_CT_OWNER_MASK (MLX5_INDIRECT_ACT_CT_MAX_PORT - 1)
+
+/* 30-31: type, 22-29: owner port, 0-21: index. */
+#define MLX5_INDIRECT_ACT_CT_GEN_IDX(owner, index) \
+	((MLX5_INDIRECT_ACTION_TYPE_CT << MLX5_INDIRECT_ACTION_TYPE_OFFSET) | \
+	 (((owner) & MLX5_INDIRECT_ACT_CT_OWNER_MASK) << \
+	  MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) | (index))
+
+#define MLX5_INDIRECT_ACT_CT_GET_OWNER(index) \
+	(((index) >> MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) & \
+	 MLX5_INDIRECT_ACT_CT_OWNER_MASK)
+
+#define MLX5_INDIRECT_ACT_CT_GET_IDX(index) \
+	((index) & ((1 << MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) - 1))
+
 /* Matches on selected register. */
 struct mlx5_rte_flow_item_tag {
 	enum modify_reg id;
@@ -1304,7 +1323,7 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
 }
 
 /*
- * Get ASO CT action by index.
+ * Get ASO CT action by device and index.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
@@ -1315,7 +1334,7 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
  *   The specified ASO CT action pointer.
  */
 static inline struct mlx5_aso_ct_action *
-flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
+flow_aso_ct_get_by_dev_idx(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
@@ -1330,6 +1349,40 @@ flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
 	return &pool->actions[idx % MLX5_ASO_CT_ACTIONS_PER_POOL];
 }
 
+/*
+ * Get ASO CT action by owner & index.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index to the ASO CT action and owner port combination.
+ *
+ * @return
+ *   The specified ASO CT action pointer.
+ */
+static inline struct mlx5_aso_ct_action *
+flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t own_idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(own_idx);
+	uint32_t idx = MLX5_INDIRECT_ACT_CT_GET_IDX(own_idx);
+
+	if (owner == PORT_ID(priv)) {
+		ct = flow_aso_ct_get_by_dev_idx(dev, idx);
+	} else {
+		struct rte_eth_dev *owndev = &rte_eth_devices[owner];
+
+		MLX5_ASSERT(owner < RTE_MAX_ETHPORTS);
+		if (dev->data->dev_started != 1)
+			return NULL;
+		ct = flow_aso_ct_get_by_dev_idx(owndev, idx);
+		if (ct->peer != PORT_ID(priv))
+			return NULL;
+	}
+	return ct;
+}
+
 int mlx5_flow_group_to_table(struct rte_eth_dev *dev,
 			     const struct mlx5_flow_tunnel *tunnel,
 			     uint32_t group, uint32_t *table,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index aa0a5acdca..ca55cff48b 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11289,7 +11289,7 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
 }
 
 /*
- * Release an ASO CT action.
+ * Release an ASO CT action by its own device.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
@@ -11300,12 +11300,12 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
  *   0 when CT action was removed, otherwise the number of references.
  */
 static inline int
-flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
+flow_dv_aso_ct_dev_release(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
 	uint32_t ret;
-	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
+	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_dev_idx(dev, idx);
 	enum mlx5_aso_ct_state state =
 			__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
 
@@ -11334,7 +11334,21 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
 		rte_spinlock_unlock(&mng->ct_sl);
 	}
-	return ret;
+	return (int)ret;
+}
+
+static inline int
+flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t own_idx)
+{
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(own_idx);
+	uint32_t idx = MLX5_INDIRECT_ACT_CT_GET_IDX(own_idx);
+	struct rte_eth_dev *owndev = &rte_eth_devices[owner];
+	RTE_SET_USED(dev);
+
+	MLX5_ASSERT(owner < RTE_MAX_ETHPORTS);
+	if (dev->data->dev_started != 1)
+		return -1;
+	return flow_dv_aso_ct_dev_release(owndev, idx);
 }
 
 /*
@@ -11486,7 +11500,7 @@ flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
 		RTE_SET_USED(reg_c);
 #endif
 		if (!ct->dr_action_orig) {
-			flow_dv_aso_ct_release(dev, ct_idx);
+			flow_dv_aso_ct_dev_release(dev, ct_idx);
 			rte_flow_error_set(error, rte_errno,
 					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					   "failed to create ASO CT action");
@@ -11502,7 +11516,7 @@ flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
 			 reg_c - REG_C_0);
 #endif
 		if (!ct->dr_action_rply) {
-			flow_dv_aso_ct_release(dev, ct_idx);
+			flow_dv_aso_ct_dev_release(dev, ct_idx);
 			rte_flow_error_set(error, rte_errno,
 					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					   "failed to create ASO CT action");
@@ -11544,12 +11558,13 @@ flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
 		return rte_flow_error_set(error, rte_errno,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to allocate CT object");
-	ct = flow_aso_ct_get_by_idx(dev, idx);
+	ct = flow_aso_ct_get_by_dev_idx(dev, idx);
 	if (mlx5_aso_ct_update_by_wqe(sh, ct, pro))
 		return rte_flow_error_set(error, EBUSY,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to update CT");
 	ct->is_original = !!pro->is_original_dir;
+	ct->peer = pro->peer_port;
 	return idx;
 }
 
@@ -11713,7 +11728,7 @@ flow_dv_translate(struct rte_eth_dev *dev,
 		const struct rte_flow_action *found_action = NULL;
 		uint32_t jump_group = 0;
 		struct mlx5_flow_counter *cnt;
-		uint32_t ct_idx;
+		uint32_t owner_idx;
 		struct mlx5_aso_ct_action *ct;
 
 		if (!mlx5_flow_os_action_supported(action_type))
@@ -12189,8 +12204,13 @@ flow_dv_translate(struct rte_eth_dev *dev,
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			break;
 		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
-			ct_idx = (uint32_t)(uintptr_t)action->conf;
-			ct = flow_aso_ct_get_by_idx(dev, ct_idx);
+			owner_idx = (uint32_t)(uintptr_t)action->conf;
+			ct = flow_aso_ct_get_by_idx(dev, owner_idx);
+			if (!ct)
+				return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						NULL,
+						"Failed to get CT object.");
 			if (mlx5_aso_ct_available(priv->sh, ct))
 				return rte_flow_error_set(error, rte_errno,
 						RTE_FLOW_ERROR_TYPE_ACTION,
@@ -12203,7 +12223,7 @@ flow_dv_translate(struct rte_eth_dev *dev,
 				dev_flow->dv.actions[actions_n] =
 							ct->dr_action_rply;
 			flow->indirect_type = MLX5_INDIRECT_ACTION_TYPE_CT;
-			flow->ct = ct_idx;
+			flow->ct = owner_idx;
 			__atomic_fetch_add(&ct->refcnt, 1, __ATOMIC_RELAXED);
 			actions_n++;
 			action_flags |= MLX5_FLOW_ACTION_CT;
@@ -13803,8 +13823,7 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
 		ret = flow_dv_translate_create_conntrack(dev, action->conf,
 							 err);
-		idx = (MLX5_INDIRECT_ACTION_TYPE_CT <<
-		       MLX5_INDIRECT_ACTION_TYPE_OFFSET) | ret;
+		idx = MLX5_INDIRECT_ACT_CT_GEN_IDX(PORT_ID(priv), ret);
 		break;
 	default:
 		rte_flow_error_set(err, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
@@ -13856,7 +13875,9 @@ flow_dv_action_destroy(struct rte_eth_dev *dev,
 		return 0;
 	case MLX5_INDIRECT_ACTION_TYPE_CT:
 		ret = flow_dv_aso_ct_release(dev, idx);
-		if (ret)
+		if (ret < 0)
+			return ret;
+		if (ret > 0)
 			DRV_LOG(DEBUG, "Connection tracking object %u still "
 				"has references %d.", idx, ret);
 		return 0;
@@ -13960,8 +13981,16 @@ __flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
 	struct mlx5_aso_ct_action *ct;
 	const struct rte_flow_action_conntrack *new_prf;
 	int ret = 0;
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(idx);
+	uint32_t dev_idx;
 
-	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (PORT_ID(priv) != owner)
+		return rte_flow_error_set(error, EACCES,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "CT object owned by another port");
+	dev_idx = MLX5_INDIRECT_ACT_CT_GET_IDX(idx);
+	ct = flow_aso_ct_get_by_dev_idx(dev, dev_idx);
 	if (!ct->refcnt)
 		return rte_flow_error_set(error, ENOMEM,
 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -14049,6 +14078,8 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_action *ct;
+	uint16_t owner;
+	uint32_t dev_idx;
 
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
@@ -14063,7 +14094,15 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 			     (&age_param->sec_since_last_hit, __ATOMIC_RELAXED);
 		return 0;
 	case MLX5_INDIRECT_ACTION_TYPE_CT:
-		ct = flow_aso_ct_get_by_idx(dev, idx);
+		owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(idx);
+		if (owner != PORT_ID(priv))
+			return rte_flow_error_set(error, EACCES,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"CT object owned by another port");
+		dev_idx = MLX5_INDIRECT_ACT_CT_GET_IDX(idx);
+		ct = flow_aso_ct_get_by_dev_idx(dev, dev_idx);
+		MLX5_ASSERT(ct);
 		if (!ct->refcnt)
 			return rte_flow_error_set(error, EFAULT,
 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v2 17/17] doc: update mlx5 support for conntrack
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (15 preceding siblings ...)
  2021-05-05  4:20   ` [dpdk-dev] [PATCH v2 16/17] net/mlx5: add support of CT between two ports Bing Zhao
@ 2021-05-05  4:20   ` Bing Zhao
  2021-05-05  6:05   ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Slava Ovsiienko
  17 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  4:20 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

In the release notes and mlx5 NIC document, the support and
limitation of connection tracking are added.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 doc/guides/nics/features/default.ini   |  1 +
 doc/guides/nics/features/mlx5.ini      |  1 +
 doc/guides/nics/mlx5.rst               | 14 ++++++++++++++
 doc/guides/rel_notes/release_21_05.rst |  2 ++
 4 files changed, 18 insertions(+)

diff --git a/doc/guides/nics/features/default.ini b/doc/guides/nics/features/default.ini
index 8046bd121e..0deb4ef547 100644
--- a/doc/guides/nics/features/default.ini
+++ b/doc/guides/nics/features/default.ini
@@ -66,6 +66,7 @@ Module EEPROM dump   =
 Registers dump       =
 LED                  =
 Multiprocess aware   =
+Connection tracking  =
 FreeBSD              =
 Linux                =
 Windows              =
diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index ddd131da16..45dbe75d07 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -45,6 +45,7 @@ Stats per queue      = Y
 FW version           = Y
 Module EEPROM dump   = Y
 Multiprocess aware   = Y
+Connection tracking  = Y
 Linux                = Y
 Windows              = P
 ARMv8                = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 2bb4f18a08..238da94118 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -107,6 +107,7 @@ Features
 - 21844 flow priorities for ingress or egress flow groups greater than 0 and for any transfer
   flow group.
 - Flow metering, including meter policy API.
+- Connection tracking.
 
 Limitations
 -----------
@@ -418,6 +419,14 @@ Limitations
      - RED: must be DROP.
   - meter profile packet mode is supported.
 
+- Connection tracking:
+
+  - connection tracking (conntrack) couldn't co-exist with ASO meter, ASO age action in a single flow rule.
+  - Flow rules insertion rate and memory consumption.
+  - software limitation:
+     - ports: a maximal number of 256.
+     - conntrack: a maximal number of 4M.
+
 Statistics
 ----------
 
@@ -1680,6 +1689,11 @@ Supported hardware offloads
    |                       | | rdma-core 35  | | rdma-core 35  |
    |                       | | ConnectX-5    | | ConnectX-5    |
    +-----------------------+-----------------+-----------------+
+   | Connection tracking   | |               | | DPDK 21.05    |
+   |                       | |     N/A       | | OFED 5.3      |
+   |                       | |               | | rdma-core 35  |
+   |                       | |               | | ConnectX-6 Dx |
+   +-----------------------+-----------------+-----------------+
 
 .. table:: Minimal SW/HW versions for shared action offload
    :name: sact
diff --git a/doc/guides/rel_notes/release_21_05.rst b/doc/guides/rel_notes/release_21_05.rst
index efd68e8c7c..4c4c37ef87 100644
--- a/doc/guides/rel_notes/release_21_05.rst
+++ b/doc/guides/rel_notes/release_21_05.rst
@@ -166,6 +166,8 @@ New Features
   * Added support for ASO (Advanced Steering Operation) meter.
   * Added support for ASO metering by PPS (packet per second).
   * Added support for the monitor policy of Power Management API.
+  * Added support for connection tracking action and item as well as context create,
+    destroy, update and query.
 
 * **Updated NXP DPAA driver.**
 
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* Re: [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (16 preceding siblings ...)
  2021-05-05  4:20   ` [dpdk-dev] [PATCH v2 17/17] doc: update mlx5 support for conntrack Bing Zhao
@ 2021-05-05  6:05   ` Slava Ovsiienko
  17 siblings, 0 replies; 147+ messages in thread
From: Slava Ovsiienko @ 2021-05-05  6:05 UTC (permalink / raw)
  To: Bing Zhao, Matan Azrad, NBU-Contact-Thomas Monjalon
  Cc: dev, Ori Kam, Raslan Darawsheh

> -----Original Message-----
> From: Bing Zhao <bingz@nvidia.com>
> Sent: Wednesday, May 5, 2021 7:20
> To: Slava Ovsiienko <viacheslavo@nvidia.com>; Matan Azrad
> <matan@nvidia.com>; NBU-Contact-Thomas Monjalon
> <thomas@monjalon.net>
> Cc: dev@dpdk.org; Ori Kam <orika@nvidia.com>; Raslan Darawsheh
> <rasland@nvidia.com>
> Subject: [PATCH v2 00/17] conntrack support in mlx5 PMD
> 
> This patch set adds the connection tracking offload support in the
> mlx5 driver, as well as the documents update.
> 
> ---
> v2: code bug fixes, commits clean up and doc update.

For the series:
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>


> ---
> 
> Bing Zhao (17):
>   common/mlx5: add connection tracking object definition
>   common/mlx5: add CT offload capability checking
>   net/mlx5: use meter color reg for CT
>   net/mlx5: initialization of CT management
>   common/mlx5: add Dexv CT objects creation
>   net/mlx5: add modify support for CT
>   net/mlx5: add actions creating for CT
>   net/mlx5: close CT management structure
>   net/mlx5: add ASO CT query implementation
>   net/mlx5: add ASO CT destroy handling
>   net/mlx5: add translation of CT action
>   net/mlx5: add translation of CT item
>   net/mlx5: add CT context update
>   net/mlx5: validation of CT action
>   net/mlx5: validation of CT item
>   net/mlx5: add support of CT between two ports
>   doc: update mlx5 support for conntrack
> 
>  doc/guides/nics/features/default.ini   |   1 +
>  doc/guides/nics/features/mlx5.ini      |   1 +
>  doc/guides/nics/mlx5.rst               |  14 +
>  doc/guides/rel_notes/release_21_05.rst |   2 +
>  drivers/common/mlx5/linux/meson.build  |   2 +
>  drivers/common/mlx5/mlx5_devx_cmds.c   |  53 +++
>  drivers/common/mlx5/mlx5_devx_cmds.h   |   5 +
>  drivers/common/mlx5/mlx5_prm.h         |  88 ++++
>  drivers/common/mlx5/version.map        |   1 +
>  drivers/net/mlx5/linux/mlx5_os.c       |  13 +
>  drivers/net/mlx5/mlx5.c                |  92 ++++
>  drivers/net/mlx5/mlx5.h                |  76 ++++
>  drivers/net/mlx5/mlx5_flow.c           |  44 +-
>  drivers/net/mlx5/mlx5_flow.h           | 101 ++++-
>  drivers/net/mlx5/mlx5_flow_aso.c       | 592 ++++++++++++++++++++++++
>  drivers/net/mlx5/mlx5_flow_dv.c        | 601 ++++++++++++++++++++++++-
>  16 files changed, 1683 insertions(+), 3 deletions(-)
> 
> --
> 2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (17 preceding siblings ...)
  2021-05-05  4:19 ` [dpdk-dev] [PATCH v2 00/17] conntrack support in mlx5 PMD Bing Zhao
@ 2021-05-05  6:40 ` Bing Zhao
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 01/17] common/mlx5: add connection tracking object definition Bing Zhao
                     ` (15 more replies)
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (4 subsequent siblings)
  23 siblings, 16 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:40 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

This patch set adds the connection tracking offload support in the
mlx5 driver, as well as the documents update.
 
---
v2: code bug fixes, commits clean up and doc update.
v3: fix error input pointer for CT MR registering
---

Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

Bing Zhao (17):
  common/mlx5: add connection tracking object definition
  common/mlx5: add CT offload capability checking
  net/mlx5: use meter color reg for CT
  net/mlx5: initialization of CT management
  common/mlx5: add Dexv CT objects creation
  net/mlx5: add modify support for CT
  net/mlx5: add actions creating for CT
  net/mlx5: close CT management structure
  net/mlx5: add ASO CT query implementation
  net/mlx5: add ASO CT destroy handling
  net/mlx5: add translation of CT action
  net/mlx5: add translation of CT item
  net/mlx5: add CT context update
  net/mlx5: validation of CT action
  net/mlx5: validation of CT item
  net/mlx5: add support of CT between two ports
  doc: update mlx5 support for conntrack

 doc/guides/nics/features/default.ini   |   1 +
 doc/guides/nics/features/mlx5.ini      |   1 +
 doc/guides/nics/mlx5.rst               |  14 +
 doc/guides/rel_notes/release_21_05.rst |   2 +
 drivers/common/mlx5/linux/meson.build  |   2 +
 drivers/common/mlx5/mlx5_devx_cmds.c   |  53 +++
 drivers/common/mlx5/mlx5_devx_cmds.h   |   5 +
 drivers/common/mlx5/mlx5_prm.h         |  88 ++++
 drivers/common/mlx5/version.map        |   1 +
 drivers/net/mlx5/linux/mlx5_os.c       |  13 +
 drivers/net/mlx5/mlx5.c                |  92 ++++
 drivers/net/mlx5/mlx5.h                |  76 ++++
 drivers/net/mlx5/mlx5_flow.c           |  44 +-
 drivers/net/mlx5/mlx5_flow.h           | 101 ++++-
 drivers/net/mlx5/mlx5_flow_aso.c       | 592 ++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c        | 601 ++++++++++++++++++++++++-
 16 files changed, 1683 insertions(+), 3 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 01/17] common/mlx5: add connection tracking object definition
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
@ 2021-05-05  6:40   ` Bing Zhao
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 02/17] common/mlx5: add CT offload capability checking Bing Zhao
                     ` (14 subsequent siblings)
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:40 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The structures of ASO connection tracking offload object are added
based on the definitions in the PRM. One CT object context will be
loaded into the cache completely in a reversed order of dwords. The
valid bit should be the MSB of the last dword. This is used for the
conntrack context creation and update, as well as for the query.

The capabilities 2 (HCA_CAP_2) layout is also added. The connection
tracking related capabilities could be queried via the HCA_CAP_2.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/mlx5_prm.h | 85 ++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index efa5ae67bf..4da89d3379 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1119,6 +1119,7 @@ enum {
 	MLX5_GET_HCA_CAP_OP_MOD_ROCE = 0x4 << 1,
 	MLX5_GET_HCA_CAP_OP_MOD_NIC_FLOW_TABLE = 0x7 << 1,
 	MLX5_GET_HCA_CAP_OP_MOD_VDPA_EMULATION = 0x13 << 1,
+	MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE_2 = 0x20 << 1,
 };
 
 #define MLX5_GENERAL_OBJ_TYPES_CAP_VIRTQ_NET_Q \
@@ -1661,6 +1662,29 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties;
 };
 
+struct mlx5_ifc_cmd_hca_cap_2_bits {
+	u8 reserved_at_0[0x80]; /* End of DW4. */
+	u8 reserved_at_80[0xb];
+	u8 log_max_num_reserved_qpn[0x5];
+	u8 reserved_at_90[0x3];
+	u8 log_reserved_qpn_granularity[0x5];
+	u8 reserved_at_98[0x3];
+	u8 log_reserved_qpn_max_alloc[0x5]; /* End of DW5. */
+	u8 max_reformat_insert_size[0x8];
+	u8 max_reformat_insert_offset[0x8];
+	u8 max_reformat_remove_size[0x8];
+	u8 max_reformat_remove_offset[0x8]; /* End of DW6. */
+	u8 aso_conntrack_reg_id[0x8];
+	u8 reserved_at_c8[0x3];
+	u8 log_conn_track_granularity[0x5];
+	u8 reserved_at_d0[0x3];
+	u8 log_conn_track_max_alloc[0x5];
+	u8 reserved_at_d8[0x3];
+	u8 log_max_conn_track_offload[0x5];
+	u8 reserved_at_e0[0x20]; /* End of DW7. */
+	u8 reserved_at_100[0x700];
+};
+
 union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
 	struct mlx5_ifc_per_protocol_networking_offload_caps_bits
@@ -2599,6 +2623,67 @@ struct mlx5_ifc_create_flow_meter_aso_in_bits {
 	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
 	struct mlx5_ifc_flow_meter_aso_bits flow_meter_aso;
 };
+
+struct mlx5_ifc_tcp_window_params_bits {
+	u8 max_ack[0x20];
+	u8 max_win[0x20];
+	u8 reply_end[0x20];
+	u8 sent_end[0x20];
+};
+
+struct mlx5_ifc_conn_track_aso_bits {
+	struct mlx5_ifc_tcp_window_params_bits reply_dir; /* End of DW3. */
+	struct mlx5_ifc_tcp_window_params_bits original_dir; /* End of DW7. */
+	u8 last_end[0x20]; /* End of DW8. */
+	u8 last_ack[0x20]; /* End of DW9. */
+	u8 last_seq[0x20]; /* End of DW10. */
+	u8 last_win[0x10];
+	u8 reserved_at_170[0xa];
+	u8 last_dir[0x1];
+	u8 last_index[0x5]; /* End of DW11. */
+	u8 reserved_at_180[0x40]; /* End of DW13. */
+	u8 reply_direction_tcp_scale[0x4];
+	u8 reply_direction_tcp_close_initiated[0x1];
+	u8 reply_direction_tcp_liberal_enabled[0x1];
+	u8 reply_direction_tcp_data_unacked[0x1];
+	u8 reply_direction_tcp_max_ack[0x1];
+	u8 reserved_at_1c8[0x8];
+	u8 original_direction_tcp_scale[0x4];
+	u8 original_direction_tcp_close_initiated[0x1];
+	u8 original_direction_tcp_liberal_enabled[0x1];
+	u8 original_direction_tcp_data_unacked[0x1];
+	u8 original_direction_tcp_max_ack[0x1];
+	u8 reserved_at_1d8[0x8]; /* End of DW14. */
+	u8 valid[0x1];
+	u8 state[0x3];
+	u8 freeze_track[0x1];
+	u8 reserved_at_1e5[0xb];
+	u8 reserved_at_1f0[0x1];
+	u8 connection_assured[0x1];
+	u8 sack_permitted[0x1];
+	u8 challenged_acked[0x1];
+	u8 heartbeat[0x1];
+	u8 max_ack_window[0x3];
+	u8 reserved_at_1f8[0x1];
+	u8 retransmission_counter[0x3];
+	u8 retranmission_limit_exceeded[0x1];
+	u8 retranmission_limit[0x3]; /* End of DW15. */
+};
+
+struct mlx5_ifc_conn_track_offload_bits {
+	u8 modify_field_select[0x40];
+	u8 reserved_at_40[0x40];
+	u8 reserved_at_80[0x8];
+	u8 conn_track_aso_access_pd[0x18];
+	u8 reserved_at_a0[0x160];
+	struct mlx5_ifc_conn_track_aso_bits conn_track_aso;
+};
+
+struct mlx5_ifc_create_conn_track_aso_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+	struct mlx5_ifc_conn_track_offload_bits conn_track_offload;
+};
+
 enum mlx5_access_aso_opc_mod {
 	ASO_OPC_MOD_IPSEC = 0x0,
 	ASO_OPC_MOD_CONNECTION_TRACKING = 0x1,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 02/17] common/mlx5: add CT offload capability checking
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 01/17] common/mlx5: add connection tracking object definition Bing Zhao
@ 2021-05-05  6:40   ` Bing Zhao
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 03/17] net/mlx5: use meter color reg for CT Bing Zhao
                     ` (13 subsequent siblings)
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:40 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

During startup, the ASO connection tracking offload capability could
be queried via HCA_CAP_QUERY command. If the HW doesn't support ASO
CT, the value would be 0 by default. The following initialization
should be skipped and the creation of the CT object should return
a failure directly.

The following CT creation should also check this capability. With
the old driver, the pre-processing macro should be used in order to
make the compiling pass.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/linux/meson.build | 2 ++
 drivers/common/mlx5/mlx5_devx_cmds.c  | 3 +++
 drivers/common/mlx5/mlx5_devx_cmds.h  | 1 +
 drivers/common/mlx5/mlx5_prm.h        | 3 +++
 4 files changed, 9 insertions(+)

diff --git a/drivers/common/mlx5/linux/meson.build b/drivers/common/mlx5/linux/meson.build
index 3334bd5cb2..007834a49b 100644
--- a/drivers/common/mlx5/linux/meson.build
+++ b/drivers/common/mlx5/linux/meson.build
@@ -189,6 +189,8 @@ has_sym_args = [
             'MLX5_WQE_UMR_CTRL_FLAG_INLINE' ],
         [ 'HAVE_MLX5_DR_FLOW_DUMP_RULE', 'infiniband/mlx5dv.h',
             'mlx5dv_dump_dr_rule' ],
+        [ 'HAVE_MLX5_DR_ACTION_ASO_CT', 'infiniband/mlx5dv.h',
+            'MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_INITIATOR' ],
 ]
 config = configuration_data()
 foreach arg:has_sym_args
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 79fff6457c..ad67883fde 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -760,6 +760,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
 		MLX5_GET(cmd_hca_cap, hcattr, umr_indirect_mkey_disabled);
 	attr->umr_modify_entity_size_disabled =
 		MLX5_GET(cmd_hca_cap, hcattr, umr_modify_entity_size_disabled);
+	attr->ct_offload = !!(MLX5_GET64(cmd_hca_cap, hcattr,
+					 general_obj_types) &
+			      MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD);
 	if (attr->qos.sup) {
 		MLX5_SET(query_hca_cap_in, in, op_mod,
 			 MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP |
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 870bdb6b30..746320cf04 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -137,6 +137,7 @@ struct mlx5_hca_attr {
 	uint32_t qp_ts_format:2;
 	uint32_t regex:1;
 	uint32_t reg_c_preserve:1;
+	uint32_t ct_offload:1; /* General obj type ASO CT offload supported. */
 	uint32_t regexp_num_of_engines;
 	uint32_t log_max_ft_sampler_num:8;
 	uint32_t geneve_tlv_opt;
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 4da89d3379..71bdf43668 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1134,6 +1134,8 @@ enum {
 			(1ULL << MLX5_GENERAL_OBJ_TYPE_FLOW_METER_ASO)
 #define MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT \
 			(1ULL << MLX5_OBJ_TYPE_GENEVE_TLV_OPT)
+#define MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD \
+			(1ULL << MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD)
 
 enum {
 	MLX5_HCA_CAP_OPMOD_GET_MAX   = 0,
@@ -2456,6 +2458,7 @@ enum {
 	MLX5_GENERAL_OBJ_TYPE_FLEX_PARSE_GRAPH = 0x0022,
 	MLX5_GENERAL_OBJ_TYPE_FLOW_METER_ASO = 0x0024,
 	MLX5_GENERAL_OBJ_TYPE_FLOW_HIT_ASO = 0x0025,
+	MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD = 0x0031,
 };
 
 struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 03/17] net/mlx5: use meter color reg for CT
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 01/17] common/mlx5: add connection tracking object definition Bing Zhao
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 02/17] common/mlx5: add CT offload capability checking Bing Zhao
@ 2021-05-05  6:40   ` Bing Zhao
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 04/17] net/mlx5: initialization of CT management Bing Zhao
                     ` (12 subsequent siblings)
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:40 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Based on the capacity, 3 registers could be used. Due to the register
allocation, only the one REG_C_3 for meter color could be reused
right now.

Then in the same flow, no more than one ASO action can be supported.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.c | 4 +++-
 drivers/net/mlx5/mlx5_flow.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index a7ceafe221..edad6007a8 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -760,7 +760,9 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
 			       REG_C_3;
 	case MLX5_MTR_COLOR:
-	case MLX5_ASO_FLOW_HIT: /* Both features use the same REG_C. */
+	case MLX5_ASO_FLOW_HIT:
+	case MLX5_ASO_CONNTRACK:
+		/* All features use the same REG_C. */
 		MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
 		return priv->mtr_color_reg;
 	case MLX5_COPY_MARK:
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index cc3e79d088..964e13a869 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -84,6 +84,7 @@ enum mlx5_feature_name {
 	MLX5_MTR_COLOR,
 	MLX5_MTR_ID,
 	MLX5_ASO_FLOW_HIT,
+	MLX5_ASO_CONNTRACK,
 };
 
 /* Default queue number. */
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 04/17] net/mlx5: initialization of CT management
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
                     ` (2 preceding siblings ...)
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 03/17] net/mlx5: use meter color reg for CT Bing Zhao
@ 2021-05-05  6:40   ` Bing Zhao
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 05/17] common/mlx5: add Dexv CT objects creation Bing Zhao
                     ` (11 subsequent siblings)
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:40 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The definitions of ASO connection tracking objects management
structures are added.

Considering performance, the bulk allocation of ASO CT objects
should be used. The maximal value per bulk and the granularity could
be fetched from HCA capabilities 2. Right now, a fixed number of 64
is used for each bulk for a better management purpose.

The ASO QP for CT is initialized, the SQ will be used for both
modify and query command.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c | 13 +++++++++
 drivers/net/mlx5/mlx5.c          | 36 +++++++++++++++++++++++
 drivers/net/mlx5/mlx5.h          | 50 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_aso.c | 50 ++++++++++++++++++++++++++++++++
 4 files changed, 149 insertions(+)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 479ee7d8d1..5ac787106d 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1323,6 +1323,19 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			DRV_LOG(DEBUG, "Flow Hit ASO is supported.");
 		}
 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */
+#if defined(HAVE_MLX5_DR_CREATE_ACTION_ASO) && \
+	defined(HAVE_MLX5_DR_ACTION_ASO_CT)
+		if (config->hca_attr.ct_offload &&
+		    priv->mtr_color_reg == REG_C_3) {
+			err = mlx5_flow_aso_ct_mng_init(sh);
+			if (err) {
+				err = -err;
+				goto error;
+			}
+			DRV_LOG(DEBUG, "CT ASO is supported.");
+			sh->ct_aso_en = 1;
+		}
+#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO && HAVE_MLX5_DR_ACTION_ASO_CT */
 #if defined(HAVE_MLX5DV_DR) && defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_SAMPLE)
 		if (config->hca_attr.log_max_ft_sampler_num > 0  &&
 		    config->dv_flow_en) {
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 8cd6f1eaee..86dbe6d573 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -670,6 +670,42 @@ mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh)
 	}
 }
 
+/*
+ * Initialize the ASO connection tracking structure.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
+{
+	int err;
+
+	if (sh->ct_mng)
+		return 0;
+	sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng),
+				 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+	if (!sh->ct_mng) {
+		DRV_LOG(ERR, "ASO CT management allocation failed.");
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
+	if (err) {
+		mlx5_free(sh->ct_mng);
+		/* rte_errno should be extracted from the failure. */
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	rte_spinlock_init(&sh->ct_mng->ct_sl);
+	rte_rwlock_init(&sh->ct_mng->resize_rwl);
+	LIST_INIT(&sh->ct_mng->free_cts);
+	return 0;
+}
+
 /**
  * Initialize the flow resources' indexed mempool.
  *
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c62977613a..1a5c78fa3a 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -983,6 +983,52 @@ struct mlx5_bond_info {
 	} ports[MLX5_BOND_MAX_PORTS];
 };
 
+/* Number of connection tracking objects per pool: must be a power of 2. */
+#define MLX5_ASO_CT_ACTIONS_PER_POOL 64
+
+/* ASO Conntrack state. */
+enum mlx5_aso_ct_state {
+	ASO_CONNTRACK_FREE, /* Inactive, in the free list. */
+	ASO_CONNTRACK_WAIT, /* WQE sent in the SQ. */
+	ASO_CONNTRACK_READY, /* CQE received w/o error. */
+	ASO_CONNTRACK_QUERY, /* WQE for query sent. */
+	ASO_CONNTRACK_MAX, /* Guard. */
+};
+
+/* Generic ASO connection tracking structure. */
+struct mlx5_aso_ct_action {
+	LIST_ENTRY(mlx5_aso_ct_action) next; /* Pointer to the next ASO CT. */
+	void *dr_action_orig; /* General action object for original dir. */
+	void *dr_action_rply; /* General action object for reply dir. */
+	uint32_t refcnt; /* Action used count in device flows. */
+	uint16_t offset; /* Offset of ASO CT in DevX objects bulk. */
+	uint16_t peer; /* The only peer port index could also use this CT. */
+	enum mlx5_aso_ct_state state; /* ASO CT state. */
+	bool is_original; /* The direction of the DR action to be used. */
+};
+
+/* ASO connection tracking software pool definition. */
+struct mlx5_aso_ct_pool {
+	uint16_t index; /* Pool index in pools array. */
+	struct mlx5_devx_obj *devx_obj;
+	/* The first devx object in the bulk, used for freeing (not yet). */
+	struct mlx5_aso_ct_action actions[MLX5_ASO_CT_ACTIONS_PER_POOL];
+	/* CT action structures bulk. */
+};
+
+LIST_HEAD(aso_ct_list, mlx5_aso_ct_action);
+
+/* Pools management structure for ASO connection tracking pools. */
+struct mlx5_aso_ct_pools_mng {
+	struct mlx5_aso_ct_pool **pools;
+	uint16_t n; /* Total number of pools. */
+	uint16_t next; /* Number of pools in use, index of next free pool. */
+	rte_spinlock_t ct_sl; /* The ASO CT free list lock. */
+	rte_rwlock_t resize_rwl; /* The ASO CT pool resize lock. */
+	struct aso_ct_list free_cts; /* Free ASO CT objects list. */
+	struct mlx5_aso_sq aso_sq; /* ASO queue objects. */
+};
+
 /*
  * Shared Infiniband device context for Master/Representors
  * which belong to same IB device with multiple IB ports.
@@ -996,6 +1042,7 @@ struct mlx5_dev_ctx_shared {
 	uint32_t sq_ts_format:2; /* SQ timestamp formats supported. */
 	uint32_t qp_ts_format:2; /* QP timestamp formats supported. */
 	uint32_t meter_aso_en:1; /* Flow Meter ASO is supported. */
+	uint32_t ct_aso_en:1; /* Connection Tracking ASO is supported. */
 	uint32_t max_port; /* Maximal IB device port index. */
 	struct mlx5_bond_info bond; /* Bonding information. */
 	void *ctx; /* Verbs/DV/DevX context. */
@@ -1058,6 +1105,8 @@ struct mlx5_dev_ctx_shared {
 	rte_spinlock_t geneve_tlv_opt_sl; /* Lock for geneve tlv resource */
 	struct mlx5_flow_mtr_mng *mtrmng;
 	/* Meter management structure. */
+	struct mlx5_aso_ct_pools_mng *ct_mng;
+	/* Management data for ASO connection tracking. */
 	struct mlx5_dev_shared_port port[]; /* per device port data array. */
 };
 
@@ -1355,6 +1404,7 @@ bool mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev);
 int mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev);
 int mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh);
 int mlx5_aso_flow_mtrs_mng_init(struct mlx5_dev_ctx_shared *sh);
+int mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh);
 
 /* mlx5_ethdev.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 300987d0e9..c24d865284 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -186,6 +186,43 @@ mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq)
 	}
 }
 
+/*
+ * Initialize Send Queue used for ASO connection tracking.
+ *
+ * @param[in] sq
+ *   ASO SQ to initialize.
+ */
+static void
+mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
+{
+	volatile struct mlx5_aso_wqe *restrict wqe;
+	int i;
+	int size = 1 << sq->log_desc_n;
+	uint64_t addr;
+
+	/* All the next fields state should stay constant. */
+	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
+		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
+							  (sizeof(*wqe) >> 4));
+		/* One unique MR for the query data. */
+		wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.mkey->id);
+		/* Magic number 64 represents the length of a ASO CT obj. */
+		addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64);
+		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
+		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
+		/*
+		 * The values of operand_masks are different for modify
+		 * and query.
+		 * And data_mask may be different for each modification. In
+		 * query, it could be zero and ignored.
+		 * CQE generation is always needed, in order to decide when
+		 * it is available to create the flow or read the data.
+		 */
+		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
+						   MLX5_COMP_MODE_OFFSET);
+	}
+}
+
 /**
  * Create Send Queue used for ASO access.
  *
@@ -293,6 +330,19 @@ mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
 			return -1;
 		mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
 		break;
+	case ASO_OPC_MOD_CONNECTION_TRACKING:
+		/* 64B per object for query. */
+		if (mlx5_aso_reg_mr(sh, 64 * sq_desc_n,
+				    &sh->ct_mng->aso_sq.mr, 0))
+			return -1;
+		if (mlx5_aso_sq_create(sh->ctx, &sh->ct_mng->aso_sq, 0,
+				sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
+				sh->sq_ts_format)) {
+			mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
+			return -1;
+		}
+		mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
+		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
 		return -1;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 05/17] common/mlx5: add Dexv CT objects creation
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
                     ` (3 preceding siblings ...)
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 04/17] net/mlx5: initialization of CT management Bing Zhao
@ 2021-05-05  6:40   ` Bing Zhao
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 06/17] net/mlx5: add modify support for CT Bing Zhao
                     ` (10 subsequent siblings)
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:40 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Adding support for connection tracking ASO creation via Devx command.
Right now only bulk creation is supported.

By default, the objects with zero contents will be created. Before
using a single object, the modification via posting a WQE to the ASO
CT SQ is needed.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c | 50 ++++++++++++++++++++++++++++
 drivers/common/mlx5/mlx5_devx_cmds.h |  4 +++
 drivers/common/mlx5/version.map      |  1 +
 3 files changed, 55 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index ad67883fde..dc01266642 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -2232,6 +2232,56 @@ mlx5_devx_cmd_create_flow_meter_aso_obj(void *ctx, uint32_t pd,
 	return flow_meter_aso_obj;
 }
 
+/*
+ * Create general object of type CONN_TRACK_OFFLOAD using DevX API.
+ *
+ * @param[in] ctx
+ *   Context returned from mlx5 open_device() glue function.
+ * @param [in] pd
+ *   PD value to associate the CONN_TRACK_OFFLOAD ASO object with.
+ * @param [in] log_obj_size
+ *   log_obj_size to allocate its power of 2 * objects
+ *   in one CONN_TRACK_OFFLOAD bulk allocation.
+ *
+ * @return
+ *   The DevX object created, NULL otherwise and rte_errno is set.
+ */
+struct mlx5_devx_obj *
+mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx, uint32_t pd,
+					    uint32_t log_obj_size)
+{
+	uint32_t in[MLX5_ST_SZ_DW(create_conn_track_aso_in)] = {0};
+	uint32_t out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	struct mlx5_devx_obj *ct_aso_obj;
+	void *ptr;
+
+	ct_aso_obj = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ct_aso_obj),
+				 0, SOCKET_ID_ANY);
+	if (!ct_aso_obj) {
+		DRV_LOG(ERR, "Failed to allocate CONN_TRACK_OFFLOAD object.");
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	ptr = MLX5_ADDR_OF(create_conn_track_aso_in, in, hdr);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, obj_type,
+		 MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, log_obj_range, log_obj_size);
+	ptr = MLX5_ADDR_OF(create_conn_track_aso_in, in, conn_track_offload);
+	MLX5_SET(conn_track_offload, ptr, conn_track_aso_access_pd, pd);
+	ct_aso_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
+						     out, sizeof(out));
+	if (!ct_aso_obj->obj) {
+		rte_errno = errno;
+		DRV_LOG(ERR, "Failed to create CONN_TRACK_OFFLOAD obj by using DevX.");
+		mlx5_free(ct_aso_obj);
+		return NULL;
+	}
+	ct_aso_obj->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	return ct_aso_obj;
+}
+
 /**
  * Create general object of type GENEVE TLV option using DevX API.
  *
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 746320cf04..e67cea506d 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -569,6 +569,10 @@ struct mlx5_devx_obj *mlx5_devx_cmd_queue_counter_alloc(void *ctx);
 __rte_internal
 int mlx5_devx_cmd_queue_counter_query(struct mlx5_devx_obj *dcs, int clear,
 				      uint32_t *out_of_buffers);
+__rte_internal
+struct mlx5_devx_obj *mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx,
+					uint32_t pd, uint32_t log_obj_size);
+
 /**
  * Create general object of type FLOW_METER_ASO using DevX API..
  *
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index 18dc96276d..4bbcba5b8e 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -13,6 +13,7 @@ INTERNAL {
 	mlx5_dev_to_pci_addr; # WINDOWS_NO_EXPORT
 
 	mlx5_devx_cmd_alloc_pd;
+	mlx5_devx_cmd_create_conn_track_offload_obj;
 	mlx5_devx_cmd_create_cq;
 	mlx5_devx_cmd_create_flex_parser;
 	mlx5_devx_cmd_create_qp;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 06/17] net/mlx5: add modify support for CT
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
                     ` (4 preceding siblings ...)
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 05/17] common/mlx5: add Dexv CT objects creation Bing Zhao
@ 2021-05-05  6:40   ` Bing Zhao
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 07/17] net/mlx5: add actions creating " Bing Zhao
                     ` (9 subsequent siblings)
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:40 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After the connection tracking object bulk is allocated, all the
objects' contents are filled with zero by default. Every
new-allocated object must be modified via WQE operation before it is
used.

In order to reduce the latency for the flow creation, an asynchronous
way is used instead of busy waiting for the CQE to be generated.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |   8 +
 drivers/net/mlx5/mlx5_flow.h     |   3 +
 drivers/net/mlx5/mlx5_flow_aso.c | 252 +++++++++++++++++++++++++++++++
 3 files changed, 263 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1a5c78fa3a..1898a0401f 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -490,6 +490,7 @@ struct mlx5_aso_sq_elem {
 			uint16_t burst_size;
 		};
 		struct mlx5_aso_mtr *mtr;
+		struct mlx5_aso_ct_action *ct;
 	};
 };
 
@@ -1007,6 +1008,10 @@ struct mlx5_aso_ct_action {
 	bool is_original; /* The direction of the DR action to be used. */
 };
 
+/* CT action object state update. */
+#define MLX5_ASO_CT_UPDATE_STATE(c, s) \
+	__atomic_store_n(&((c)->state), (s), __ATOMIC_RELAXED)
+
 /* ASO connection tracking software pool definition. */
 struct mlx5_aso_ct_pool {
 	uint16_t index; /* Pool index in pools array. */
@@ -1690,5 +1695,8 @@ int mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr);
 int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr);
+int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			      struct mlx5_aso_ct_action *ct,
+			      const struct rte_flow_action_conntrack *profile);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 964e13a869..eb5b53ac6a 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -45,6 +45,7 @@ enum mlx5_rte_flow_action_type {
 enum {
 	MLX5_INDIRECT_ACTION_TYPE_RSS,
 	MLX5_INDIRECT_ACTION_TYPE_AGE,
+	MLX5_INDIRECT_ACTION_TYPE_AGE,
 };
 
 /* Matches on selected register. */
@@ -839,6 +840,8 @@ struct mlx5_flow {
 #define MLX5_ASO_WQE_CQE_RESPONSE_DELAY 10u
 #define MLX5_MTR_POLL_WQE_CQE_TIMES 100000u
 
+#define MLX5_CT_POLL_WQE_CQE_TIMES MLX5_MTR_POLL_WQE_CQE_TIMES
+
 #define MLX5_MAN_WIDTH 8
 /* Legacy Meter parameter structure. */
 struct mlx5_legacy_flow_meter {
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index c24d865284..0ff19e6171 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -887,3 +887,255 @@ mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 			mtr->offset);
 	return -1;
 }
+
+/*
+ * Post a WQE to the ASO CT SQ to modify the context.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ * @param[in] ct
+ *   Pointer to the generic CT structure related to the context.
+ * @param[in] profile
+ *   Pointer to configuration profile.
+ *
+ * @return
+ *   1 on success (WQE number), 0 on failure.
+ */
+static uint16_t
+mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
+			      struct mlx5_aso_ct_action *ct,
+			      const struct rte_flow_action_conntrack *profile)
+{
+	volatile struct mlx5_aso_wqe *wqe = NULL;
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t res;
+	struct mlx5_aso_ct_pool *pool;
+	void *desg;
+	void *orig_dir;
+	void *reply_dir;
+
+	rte_spinlock_lock(&sq->sqsl);
+	/* Prevent other threads to update the index. */
+	res = size - (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!res)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
+		return 0;
+	}
+	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
+	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
+	/* Fill next WQE. */
+	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
+	sq->elts[sq->head & mask].ct = ct;
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	/* Each WQE will have a single CT object. */
+	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
+						  ct->offset);
+	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
+			(ASO_OPC_MOD_CONNECTION_TRACKING <<
+			 WQE_CSEG_OPC_MOD_OFFSET) |
+			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
+	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
+			(0u |
+			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
+			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
+	wqe->aso_cseg.data_mask = UINT64_MAX;
+	/* To make compiler happy. */
+	desg = (void *)(uintptr_t)wqe->aso_dseg.data;
+	MLX5_SET(conn_track_aso, desg, valid, 1);
+	MLX5_SET(conn_track_aso, desg, state, profile->state);
+	MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable);
+	MLX5_SET(conn_track_aso, desg, connection_assured,
+		 profile->live_connection);
+	MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack);
+	MLX5_SET(conn_track_aso, desg, challenged_acked,
+		 profile->challenge_ack_passed);
+	/* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */
+	MLX5_SET(conn_track_aso, desg, heartbeat, 0);
+	MLX5_SET(conn_track_aso, desg, max_ack_window,
+		 profile->max_ack_window);
+	MLX5_SET(conn_track_aso, desg, retransmission_counter, 0);
+	MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0);
+	MLX5_SET(conn_track_aso, desg, retranmission_limit,
+		 profile->retransmission_limit);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale,
+		 profile->reply_dir.scale);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated,
+		 profile->reply_dir.close_initiated);
+	/* Both directions will use the same liberal mode. */
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled,
+		 profile->liberal_mode);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked,
+		 profile->reply_dir.data_unacked);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack,
+		 profile->reply_dir.last_ack_seen);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale,
+		 profile->original_dir.scale);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated,
+		 profile->original_dir.close_initiated);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled,
+		 profile->liberal_mode);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked,
+		 profile->original_dir.data_unacked);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack,
+		 profile->original_dir.last_ack_seen);
+	MLX5_SET(conn_track_aso, desg, last_win, profile->last_window);
+	MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction);
+	MLX5_SET(conn_track_aso, desg, last_index, profile->last_index);
+	MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq);
+	MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack);
+	MLX5_SET(conn_track_aso, desg, last_end, profile->last_end);
+	orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir);
+	MLX5_SET(tcp_window_params, orig_dir, sent_end,
+		 profile->original_dir.sent_end);
+	MLX5_SET(tcp_window_params, orig_dir, reply_end,
+		 profile->original_dir.reply_end);
+	MLX5_SET(tcp_window_params, orig_dir, max_win,
+		 profile->original_dir.max_win);
+	MLX5_SET(tcp_window_params, orig_dir, max_ack,
+		 profile->original_dir.max_ack);
+	reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir);
+	MLX5_SET(tcp_window_params, reply_dir, sent_end,
+		 profile->reply_dir.sent_end);
+	MLX5_SET(tcp_window_params, reply_dir, reply_end,
+		 profile->reply_dir.reply_end);
+	MLX5_SET(tcp_window_params, reply_dir, max_win,
+		 profile->reply_dir.max_win);
+	MLX5_SET(tcp_window_params, reply_dir, max_ack,
+		 profile->reply_dir.max_ack);
+	sq->head++;
+	sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
+	rte_io_wmb();
+	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
+	rte_wmb();
+	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
+	rte_wmb();
+	rte_spinlock_unlock(&sq->sqsl);
+	return 1;
+}
+
+/*
+ * Update the status field of CTs to indicate ready to be used by flows.
+ * A continuous number of CTs since last update.
+ *
+ * @param[in] sq
+ *   Pointer to ASO CT SQ.
+ * @param[in] num
+ *   Number of CT structures to be updated.
+ *
+ * @return
+ *   0 on success, a negative value.
+ */
+static void
+mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
+{
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t i;
+	struct mlx5_aso_ct_action *ct = NULL;
+	uint16_t idx;
+
+	for (i = 0; i < num; i++) {
+		idx = (uint16_t)((sq->tail + i) & mask);
+		ct = sq->elts[idx].ct;
+		MLX5_ASSERT(ct);
+		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
+	}
+}
+
+/*
+ * Handle completions from WQEs sent to ASO CT.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ */
+static void
+mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
+{
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	struct mlx5_aso_cq *cq = &sq->cq;
+	volatile struct mlx5_cqe *restrict cqe;
+	const uint32_t cq_size = 1 << cq->log_desc_n;
+	const uint32_t mask = cq_size - 1;
+	uint32_t idx;
+	uint32_t next_idx;
+	uint16_t max;
+	uint16_t n = 0;
+	int ret;
+
+	rte_spinlock_lock(&sq->sqsl);
+	max = (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!max)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		return;
+	}
+	next_idx = cq->cq_ci & mask;
+	do {
+		idx = next_idx;
+		next_idx = (cq->cq_ci + 1) & mask;
+		/* Need to confirm the position of the prefetch. */
+		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
+		cqe = &cq->cq_obj.cqes[idx];
+		ret = check_cqe(cqe, cq_size, cq->cq_ci);
+		/*
+		 * Be sure owner read is done before any other cookie field or
+		 * opaque field.
+		 */
+		rte_io_rmb();
+		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
+			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
+				break;
+			mlx5_aso_cqe_err_handle(sq);
+		} else {
+			n++;
+		}
+		cq->cq_ci++;
+	} while (1);
+	if (likely(n)) {
+		mlx5_aso_ct_status_update(sq, n);
+		sq->tail += n;
+		rte_io_wmb();
+		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
+	}
+	rte_spinlock_unlock(&sq->sqsl);
+}
+
+/*
+ * Update connection tracking ASO context by sending WQE.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ * @param[in] profile
+ *   Pointer to connection tracking TCP parameter.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			  struct mlx5_aso_ct_action *ct,
+			  const struct rte_flow_action_conntrack *profile)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+
+	MLX5_ASSERT(ct);
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		if (mlx5_aso_ct_sq_enqueue_single(mng, ct, profile))
+			return 0;
+		/* Waiting for wqe resource. */
+		rte_delay_us_sleep(10u);
+	} while (--poll_wqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+}
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 07/17] net/mlx5: add actions creating for CT
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
                     ` (5 preceding siblings ...)
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 06/17] net/mlx5: add modify support for CT Bing Zhao
@ 2021-05-05  6:40   ` Bing Zhao
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 08/17] net/mlx5: close CT management structure Bing Zhao
                     ` (8 subsequent siblings)
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:40 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Allocating a CT from the management pools and creating the DR actions
for both directions by default.

If there is no available connection tracking action, a new pool will
be created with a fixed size bulk allocation. Right now, all the
resources are controlled by the linked list.

The ASO connection tracking context associated with these actions
need to be updated via WQE before using for steering.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h         |   4 +
 drivers/net/mlx5/mlx5_flow.h    |  29 +++-
 drivers/net/mlx5/mlx5_flow_dv.c | 263 ++++++++++++++++++++++++++++++++
 3 files changed, 295 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1898a0401f..de18a59c8e 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -987,6 +987,10 @@ struct mlx5_bond_info {
 /* Number of connection tracking objects per pool: must be a power of 2. */
 #define MLX5_ASO_CT_ACTIONS_PER_POOL 64
 
+/* Generate incremental and unique CT index from pool and offset. */
+#define MLX5_MAKE_CT_IDX(pool, offset) \
+	((pool) * MLX5_ASO_CT_ACTIONS_PER_POOL + (offset) + 1)
+
 /* ASO Conntrack state. */
 enum mlx5_aso_ct_state {
 	ASO_CONNTRACK_FREE, /* Inactive, in the free list. */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index eb5b53ac6a..8f2bc7d2f6 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -45,7 +45,7 @@ enum mlx5_rte_flow_action_type {
 enum {
 	MLX5_INDIRECT_ACTION_TYPE_RSS,
 	MLX5_INDIRECT_ACTION_TYPE_AGE,
-	MLX5_INDIRECT_ACTION_TYPE_AGE,
+	MLX5_INDIRECT_ACTION_TYPE_CT,
 };
 
 /* Matches on selected register. */
@@ -1288,6 +1288,33 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
 	return &pool->mtrs[idx % MLX5_ASO_MTRS_PER_POOL];
 }
 
+/*
+ * Get ASO CT action by index.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index to the ASO CT action.
+ *
+ * @return
+ *   The specified ASO CT action pointer.
+ */
+static inline struct mlx5_aso_ct_action *
+flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_pool *pool;
+
+	idx--;
+	MLX5_ASSERT((idx / MLX5_ASO_CT_ACTIONS_PER_POOL) < mng->n);
+	/* Bit operation AND could be used. */
+	rte_rwlock_read_lock(&mng->resize_rwl);
+	pool = mng->pools[idx / MLX5_ASO_CT_ACTIONS_PER_POOL];
+	rte_rwlock_read_unlock(&mng->resize_rwl);
+	return &pool->actions[idx % MLX5_ASO_CT_ACTIONS_PER_POOL];
+}
+
 int mlx5_flow_group_to_table(struct rte_eth_dev *dev,
 			     const struct mlx5_flow_tunnel *tunnel,
 			     uint32_t group, uint32_t *table,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 0d022dff3f..c8ff693e4c 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11120,6 +11120,262 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
 	return age_idx;
 }
 
+/*
+ * Release an ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index of ASO CT action to release.
+ *
+ * @return
+ *   0 when CT action was removed, otherwise the number of references.
+ */
+static inline int
+flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
+	uint32_t ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
+
+	if (!ret) {
+		if (ct->dr_action_orig) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			claim_zero(mlx5_glue->destroy_flow_action
+					(ct->dr_action_orig));
+#endif
+			ct->dr_action_orig = NULL;
+		}
+		if (ct->dr_action_rply) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			claim_zero(mlx5_glue->destroy_flow_action
+					(ct->dr_action_rply));
+#endif
+			ct->dr_action_rply = NULL;
+		}
+		rte_spinlock_lock(&mng->ct_sl);
+		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
+		rte_spinlock_unlock(&mng->ct_sl);
+	}
+	return ret;
+}
+
+/*
+ * Resize the ASO CT pools array by 64 pools.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value and rte_errno is set.
+ */
+static int
+flow_dv_aso_ct_pools_resize(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	void *old_pools = mng->pools;
+	/* Magic number now, need a macro. */
+	uint32_t resize = mng->n + 64;
+	uint32_t mem_size = sizeof(struct mlx5_aso_ct_pool *) * resize;
+	void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);
+
+	if (!pools) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	rte_rwlock_write_lock(&mng->resize_rwl);
+	/* ASO SQ/QP was already initialized in the startup. */
+	if (old_pools) {
+		/* Realloc could be an alternative choice. */
+		rte_memcpy(pools, old_pools,
+			   mng->n * sizeof(struct mlx5_aso_ct_pool *));
+		mlx5_free(old_pools);
+	}
+	mng->n = resize;
+	mng->pools = pools;
+	rte_rwlock_write_unlock(&mng->resize_rwl);
+	return 0;
+}
+
+/*
+ * Create and initialize a new ASO CT pool.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] ct_free
+ *   Where to put the pointer of a new CT action.
+ *
+ * @return
+ *   The CT actions pool pointer and @p ct_free is set on success,
+ *   NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_aso_ct_pool *
+flow_dv_ct_pool_create(struct rte_eth_dev *dev,
+		       struct mlx5_aso_ct_action **ct_free)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_pool *pool = NULL;
+	struct mlx5_devx_obj *obj = NULL;
+	uint32_t i;
+	uint32_t log_obj_size = rte_log2_u32(MLX5_ASO_CT_ACTIONS_PER_POOL);
+
+	obj = mlx5_devx_cmd_create_conn_track_offload_obj(priv->sh->ctx,
+						priv->sh->pdn, log_obj_size);
+	if (!obj) {
+		rte_errno = ENODATA;
+		DRV_LOG(ERR, "Failed to create conn_track_offload_obj using DevX.");
+		return NULL;
+	}
+	pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool), 0, SOCKET_ID_ANY);
+	if (!pool) {
+		rte_errno = ENOMEM;
+		claim_zero(mlx5_devx_cmd_destroy(obj));
+		return NULL;
+	}
+	pool->devx_obj = obj;
+	pool->index = mng->next;
+	/* Resize pools array if there is no room for the new pool in it. */
+	if (pool->index == mng->n && flow_dv_aso_ct_pools_resize(dev)) {
+		claim_zero(mlx5_devx_cmd_destroy(obj));
+		mlx5_free(pool);
+		return NULL;
+	}
+	mng->pools[pool->index] = pool;
+	mng->next++;
+	/* Assign the first action in the new pool, the rest go to free list. */
+	*ct_free = &pool->actions[0];
+	/* Lock outside, the list operation is safe here. */
+	for (i = 1; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
+		/* refcnt is 0 when allocating the memory. */
+		pool->actions[i].offset = i;
+		LIST_INSERT_HEAD(&mng->free_cts, &pool->actions[i], next);
+	}
+	return pool;
+}
+
+/*
+ * Allocate a ASO CT action from free list.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Index to ASO CT action on success, 0 otherwise and rte_errno is set.
+ */
+static uint32_t
+flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_action *ct = NULL;
+	struct mlx5_aso_ct_pool *pool;
+	uint8_t reg_c;
+	uint32_t ct_idx;
+
+	MLX5_ASSERT(mng);
+	if (!priv->config.devx) {
+		rte_errno = ENOTSUP;
+		return 0;
+	}
+	/* Get a free CT action, if no, a new pool will be created. */
+	rte_spinlock_lock(&mng->ct_sl);
+	ct = LIST_FIRST(&mng->free_cts);
+	if (ct) {
+		LIST_REMOVE(ct, next);
+	} else if (!flow_dv_ct_pool_create(dev, &ct)) {
+		rte_spinlock_unlock(&mng->ct_sl);
+		rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_ACTION,
+				   NULL, "failed to create ASO CT pool");
+		return 0;
+	}
+	rte_spinlock_unlock(&mng->ct_sl);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	ct_idx = MLX5_MAKE_CT_IDX(pool->index, ct->offset);
+	/* 0: inactive, 1: created, 2+: used by flows. */
+	__atomic_store_n(&ct->refcnt, 1, __ATOMIC_RELAXED);
+	reg_c = mlx5_flow_get_reg_id(dev, MLX5_ASO_CONNTRACK, 0, error);
+	if (!ct->dr_action_orig) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+		ct->dr_action_orig = mlx5_glue->dv_create_flow_action_aso
+			(priv->sh->rx_domain, pool->devx_obj->obj,
+			 ct->offset,
+			 MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_INITIATOR,
+			 reg_c - REG_C_0);
+#else
+		RTE_SET_USED(reg_c);
+#endif
+		if (!ct->dr_action_orig) {
+			flow_dv_aso_ct_release(dev, ct_idx);
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					   "failed to create ASO CT action");
+			return 0;
+		}
+	}
+	if (!ct->dr_action_rply) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+		ct->dr_action_rply = mlx5_glue->dv_create_flow_action_aso
+			(priv->sh->rx_domain, pool->devx_obj->obj,
+			 ct->offset,
+			 MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_RESPONDER,
+			 reg_c - REG_C_0);
+#endif
+		if (!ct->dr_action_rply) {
+			flow_dv_aso_ct_release(dev, ct_idx);
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					   "failed to create ASO CT action");
+			return 0;
+		}
+	}
+	return ct_idx;
+}
+
+/*
+ * Create a conntrack object with context and actions by using ASO mechanism.
+ *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
+ * @param[in] pro
+ *   Pointer to conntrack information profile.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Index to conntrack object on success, 0 otherwise.
+ */
+static uint32_t
+flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
+				   const struct rte_flow_action_conntrack *pro,
+				   struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
+	struct mlx5_aso_ct_action *ct;
+	uint32_t idx;
+
+	if (!sh->ct_aso_en)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Connection is not supported");
+	idx = flow_dv_aso_ct_alloc(dev, error);
+	if (!idx)
+		return rte_flow_error_set(error, rte_errno,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Failed to allocate CT object");
+	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (mlx5_aso_ct_update_by_wqe(sh, ct, pro))
+		return rte_flow_error_set(error, EBUSY,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Failed to update CT");
+	return idx;
+}
+
 /**
  * Fill the flow with DV spec, lock free
  * (mutex should be acquired by caller).
@@ -13317,6 +13573,7 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 {
 	uint32_t idx = 0;
 	uint32_t ret = 0;
+	struct mlx5_priv *priv = dev->data->dev_private;
 
 	switch (action->type) {
 	case RTE_FLOW_ACTION_TYPE_RSS:
@@ -13337,6 +13594,12 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 							 (void *)(uintptr_t)idx;
 		}
 		break;
+	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+		ret = flow_dv_translate_create_conntrack(dev, action->conf,
+							 err);
+		idx = (MLX5_INDIRECT_ACTION_TYPE_CT <<
+		       MLX5_INDIRECT_ACTION_TYPE_OFFSET) | ret;
+		break;
 	default:
 		rte_flow_error_set(err, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
 				   NULL, "action type not supported");
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 08/17] net/mlx5: close CT management structure
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
                     ` (6 preceding siblings ...)
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 07/17] net/mlx5: add actions creating " Bing Zhao
@ 2021-05-05  6:40   ` Bing Zhao
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 09/17] net/mlx5: add ASO CT query implementation Bing Zhao
                     ` (7 subsequent siblings)
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:40 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When freeing the IB shared context during stopping a device, the
ASO connection tracking management structure should also be cleaned
up.

All the DR actions created should be destroyed. The structures need
to be freed and ASO CT QP should be released. In the meanwhile, the
allocated and registered memory region for query should also be
deregistered and then freed.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.c          | 56 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_aso.c |  4 +++
 2 files changed, 60 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 86dbe6d573..d563da109a 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -706,6 +706,60 @@ mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
 	return 0;
 }
 
+/*
+ * Close and release all the resources of the
+ * ASO connection tracking management structure.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object to free.
+ */
+static void
+mlx5_flow_aso_ct_mng_close(struct mlx5_dev_ctx_shared *sh)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	struct mlx5_aso_ct_pool *ct_pool;
+	struct mlx5_aso_ct_action *ct;
+	uint32_t idx;
+	uint32_t val;
+	uint32_t cnt;
+	int i;
+
+	mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
+	idx = mng->next;
+	while (idx--) {
+		cnt = 0;
+		ct_pool = mng->pools[idx];
+		for (i = 0; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
+			ct = &ct_pool->actions[i];
+			val = __atomic_fetch_sub(&ct->refcnt, 1,
+						 __ATOMIC_RELAXED);
+			MLX5_ASSERT(val == 1);
+			if (val > 1)
+				cnt++;
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			if (ct->dr_action_orig)
+				claim_zero(mlx5_glue->destroy_flow_action
+							(ct->dr_action_orig));
+			if (ct->dr_action_rply)
+				claim_zero(mlx5_glue->destroy_flow_action
+							(ct->dr_action_rply));
+#endif
+		}
+		claim_zero(mlx5_devx_cmd_destroy(ct_pool->devx_obj));
+		if (cnt) {
+			DRV_LOG(DEBUG, "%u ASO CT objects are being used in the pool %u",
+				cnt, i);
+		}
+		mlx5_free(ct_pool);
+		/* in case of failure. */
+		mng->next--;
+	}
+	mlx5_free(mng->pools);
+	mlx5_free(mng);
+	/* Management structure must be cleared to 0s during allocation. */
+	sh->ct_mng = NULL;
+}
+
 /**
  * Initialize the flow resources' indexed mempool.
  *
@@ -1508,6 +1562,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->mreg_cp_tbl)
 		mlx5_hlist_destroy(priv->mreg_cp_tbl);
 	mlx5_mprq_free_mp(dev);
+	if (priv->sh->ct_mng)
+		mlx5_flow_aso_ct_mng_close(priv->sh);
 	mlx5_os_free_shared_dr(priv);
 	if (priv->rss_conf.rss_key != NULL)
 		mlx5_free(priv->rss_conf.rss_key);
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 0ff19e6171..3c2350a6b8 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -372,6 +372,10 @@ mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
 	case ASO_OPC_MOD_POLICER:
 		sq = &sh->mtrmng->pools_mng.sq;
 		break;
+	case ASO_OPC_MOD_CONNECTION_TRACKING:
+		mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
+		sq = &sh->ct_mng->aso_sq;
+		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
 		return;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 09/17] net/mlx5: add ASO CT query implementation
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
                     ` (7 preceding siblings ...)
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 08/17] net/mlx5: close CT management structure Bing Zhao
@ 2021-05-05  6:40   ` Bing Zhao
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 10/17] net/mlx5: add ASO CT destroy handling Bing Zhao
                     ` (6 subsequent siblings)
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:40 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After the connection tracking context is created and being used by
the flows, the context will be updated by the HW automatically after
a packet passed the CT validation. E.g., the ACK, SEQ, window and
state of CT can be updated with both direction traffic.

In order to query the updated contents of this context, a WQE should
be posted to the SQ with a return buffer. The data will be filled
into the buffer. And the profile will be filled with specific value.

During the execution of query command, the context may be updated.
The result of the query command may not be the latest one.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |  10 +-
 drivers/net/mlx5/mlx5_flow_aso.c | 245 +++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c  |  19 +++
 3 files changed, 273 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index de18a59c8e..d2827e78d7 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -490,7 +490,10 @@ struct mlx5_aso_sq_elem {
 			uint16_t burst_size;
 		};
 		struct mlx5_aso_mtr *mtr;
-		struct mlx5_aso_ct_action *ct;
+		struct {
+			struct mlx5_aso_ct_action *ct;
+			char *query_data;
+		};
 	};
 };
 
@@ -1702,5 +1705,10 @@ int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			      struct mlx5_aso_ct_action *ct,
 			      const struct rte_flow_action_conntrack *profile);
+int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+			   struct mlx5_aso_ct_action *ct);
+int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			     struct mlx5_aso_ct_action *ct,
+			     struct rte_flow_action_conntrack *profile);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 3c2350a6b8..3f7ed371bf 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -933,6 +933,7 @@ mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
 	/* Fill next WQE. */
 	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
 	sq->elts[sq->head & mask].ct = ct;
+	sq->elts[sq->head & mask].query_data = NULL;
 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
 	/* Each WQE will have a single CT object. */
 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
@@ -1048,9 +1049,95 @@ mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
 		ct = sq->elts[idx].ct;
 		MLX5_ASSERT(ct);
 		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
+		if (sq->elts[idx].query_data)
+			rte_memcpy(sq->elts[idx].query_data,
+				   (char *)((uintptr_t)sq->mr.addr + idx * 64),
+				   64);
 	}
 }
 
+/*
+ * Post a WQE to the ASO CT SQ to query the current context.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ * @param[in] ct
+ *   Pointer to the generic CT structure related to the context.
+ * @param[in] data
+ *   Pointer to data area to be filled.
+ *
+ * @return
+ *   1 on success (WQE number), 0 on failure.
+ */
+static int
+mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
+			    struct mlx5_aso_ct_action *ct, char *data)
+{
+	volatile struct mlx5_aso_wqe *wqe = NULL;
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t res;
+	uint16_t wqe_idx;
+	struct mlx5_aso_ct_pool *pool;
+	enum mlx5_aso_ct_state state =
+				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+
+	if (state == ASO_CONNTRACK_FREE) {
+		DRV_LOG(ERR, "Fail: No context to query");
+		return -1;
+	} else if (state == ASO_CONNTRACK_WAIT) {
+		return 0;
+	}
+	rte_spinlock_lock(&sq->sqsl);
+	res = size - (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!res)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
+		return 0;
+	}
+	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY);
+	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
+	/* Confirm the location and address of the prefetch instruction. */
+	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
+	/* Fill next WQE. */
+	wqe_idx = sq->head & mask;
+	sq->elts[wqe_idx].ct = ct;
+	sq->elts[wqe_idx].query_data = data;
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	/* Each WQE will have a single CT object. */
+	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
+						  ct->offset);
+	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
+			(ASO_OPC_MOD_CONNECTION_TRACKING <<
+			 WQE_CSEG_OPC_MOD_OFFSET) |
+			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
+	/*
+	 * There is no write request is required.
+	 * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
+	 * "BYTEWISE_64BYTE" is needed for a whole context.
+	 * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
+	 * "data_mask" is ignored.
+	 * Buffer address was already filled during initialization.
+	 */
+	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE <<
+					ASO_CSEG_DATA_MASK_MODE_OFFSET);
+	wqe->aso_cseg.data_mask = 0;
+	sq->head++;
+	/*
+	 * Each WQE contains 2 WQEBB's, even though
+	 * data segment is not used in this case.
+	 */
+	sq->pi += 2;
+	rte_io_wmb();
+	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
+	rte_wmb();
+	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
+	rte_wmb();
+	rte_spinlock_unlock(&sq->sqsl);
+	return 1;
+}
+
 /*
  * Handle completions from WQEs sent to ASO CT.
  *
@@ -1143,3 +1230,161 @@ mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		ct->offset, pool->index);
 	return -1;
 }
+
+/*
+ * The routine is used to wait for WQE completion to continue with queried data.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+		       struct mlx5_aso_ct_action *ct)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+
+	if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+	    ASO_CONNTRACK_READY)
+		return 0;
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+		    ASO_CONNTRACK_READY)
+			return 0;
+		/* Waiting for CQE ready, consider should block or sleep. */
+		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
+	} while (--poll_cqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+}
+
+/*
+ * Convert the hardware conntrack data format into the profile.
+ *
+ * @param[in] profile
+ *   Pointer to conntrack profile to be filled after query.
+ * @param[in] wdata
+ *   Pointer to data fetched from hardware.
+ */
+static inline void
+mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
+			char *wdata)
+{
+	void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
+	void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
+
+	/* MLX5_GET16 should be taken into consideration. */
+	profile->state = (enum rte_flow_conntrack_state)
+			 MLX5_GET(conn_track_aso, wdata, state);
+	profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
+	profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
+					  sack_permitted);
+	profile->live_connection = MLX5_GET(conn_track_aso, wdata,
+					    connection_assured);
+	profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
+						 challenged_acked);
+	profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
+					   max_ack_window);
+	profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
+						 retranmission_limit);
+	profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
+	profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
+	profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
+			      MLX5_GET(conn_track_aso, wdata, last_index);
+	profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
+	profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
+	profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
+	profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
+				reply_direction_tcp_liberal_enabled) |
+				MLX5_GET(conn_track_aso, wdata,
+				original_direction_tcp_liberal_enabled);
+	/* No liberal in the RTE structure profile. */
+	profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					    reply_direction_tcp_scale);
+	profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_close_initiated);
+	profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_data_unacked);
+	profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_max_ack);
+	profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
+					       r_dir, sent_end);
+	profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
+						r_dir, reply_end);
+	profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
+					      r_dir, max_win);
+	profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
+					      r_dir, max_ack);
+	profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					       original_direction_tcp_scale);
+	profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_close_initiated);
+	profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_data_unacked);
+	profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_max_ack);
+	profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
+						  o_dir, sent_end);
+	profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
+						   o_dir, reply_end);
+	profile->original_dir.max_win = MLX5_GET(tcp_window_params,
+						 o_dir, max_win);
+	profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
+						 o_dir, max_ack);
+}
+
+/*
+ * Query connection tracking information parameter by send WQE.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ * @param[out] profile
+ *   Pointer to connection tracking TCP information.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			 struct mlx5_aso_ct_action *ct,
+			 struct rte_flow_action_conntrack *profile)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+	char out_data[64 * 2];
+	int ret;
+
+	MLX5_ASSERT(ct);
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		ret = mlx5_aso_ct_sq_query_single(mng, ct, out_data);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			goto data_handle;
+		/* Waiting for wqe resource or state. */
+		else
+			rte_delay_us_sleep(10u);
+	} while (--poll_wqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+data_handle:
+	ret = mlx5_aso_ct_wait_ready(sh, ct);
+	if (!ret)
+		mlx5_aso_ct_obj_analyze(profile, out_data);
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index c8ff693e4c..84e7f0b3d3 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -13775,6 +13775,8 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 	uint32_t act_idx = (uint32_t)(uintptr_t)handle;
 	uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
 
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
@@ -13788,6 +13790,23 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 			resp->sec_since_last_hit = __atomic_load_n
 			     (&age_param->sec_since_last_hit, __ATOMIC_RELAXED);
 		return 0;
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		ct = flow_aso_ct_get_by_idx(dev, idx);
+		if (!ct->refcnt)
+			return rte_flow_error_set(error, EFAULT,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"CT object is inactive");
+		((struct rte_flow_action_conntrack *)data)->peer_port =
+							ct->peer;
+		((struct rte_flow_action_conntrack *)data)->is_original_dir =
+							ct->is_original;
+		if (mlx5_aso_ct_query_by_wqe(priv->sh, ct, data))
+			return rte_flow_error_set(error, EIO,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"Failed to query CT context");
+		return 0;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 10/17] net/mlx5: add ASO CT destroy handling
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
                     ` (8 preceding siblings ...)
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 09/17] net/mlx5: add ASO CT query implementation Bing Zhao
@ 2021-05-05  6:40   ` Bing Zhao
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 11/17] net/mlx5: add translation of CT action Bing Zhao
                     ` (5 subsequent siblings)
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:40 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When trying to destroy an ASO connection tracking context, the DR
action created on this context should also be destroyed. Before
inserting the related software object into the management free list,
the reference count should be checked.

Right now, the context object will not be freed to the system and
will be reused directly from the free list.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_dv.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 84e7f0b3d3..0fa0671ace 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11136,9 +11136,15 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	uint32_t ret;
 	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
-	uint32_t ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
+	enum mlx5_aso_ct_state state =
+			__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
 
+	/* Cannot release when CT is in the ASO SQ. */
+	if (state == ASO_CONNTRACK_WAIT || state == ASO_CONNTRACK_QUERY)
+		return -1;
+	ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
 	if (!ret) {
 		if (ct->dr_action_orig) {
 #ifdef HAVE_MLX5_DR_ACTION_ASO_CT
@@ -11154,6 +11160,8 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 #endif
 			ct->dr_action_rply = NULL;
 		}
+		/* Clear the state to free, no need in 1st allocation. */
+		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_FREE);
 		rte_spinlock_lock(&mng->ct_sl);
 		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
 		rte_spinlock_unlock(&mng->ct_sl);
@@ -13648,6 +13656,12 @@ flow_dv_action_destroy(struct rte_eth_dev *dev,
 			DRV_LOG(DEBUG, "Indirect age action %" PRIu32 " was"
 				" released with references %d.", idx, ret);
 		return 0;
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		ret = flow_dv_aso_ct_release(dev, idx);
+		if (ret)
+			DRV_LOG(DEBUG, "Connection tracking object %u still "
+				"has references %d.", idx, ret);
+		return 0;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 11/17] net/mlx5: add translation of CT action
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
                     ` (9 preceding siblings ...)
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 10/17] net/mlx5: add ASO CT destroy handling Bing Zhao
@ 2021-05-05  6:40   ` Bing Zhao
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 12/17] net/mlx5: add translation of CT item Bing Zhao
                     ` (4 subsequent siblings)
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:40 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When creating a flow with this action context for CT, it needs to be
translated in 2 levels.

First, retrieve from action context to rte_flow action.
Second, translate it to the corresponding DR action with traffic
direction that was specified when creating or updating via
rte_flow_action_handle* API.

Before using the DR action in a flow, the CT context should be
available to use in the hardware. A synchronization is done before
inserting the flow rule with CT action to check the HW availability
of this CT context.

In order to release the DR actions and reuse the context of a CT,
the reference count should also be handled in the the flow
destroying.

The CT index will be recorded in the rte_flow by reusing the ASO age
index to save memory, since only one ASO action is supported in one
flow rule currently. The action context type should also be saved
for CT. When destroying a flow rule, if the context type is CT and
the index is valid (non-zero), the release process should be
handled. By default, the handling will fall back to try to release
the ASO age if any.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |  2 ++
 drivers/net/mlx5/mlx5_flow.c     |  9 +++++++
 drivers/net/mlx5/mlx5_flow.h     |  7 +++++-
 drivers/net/mlx5/mlx5_flow_aso.c | 41 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c  | 28 +++++++++++++++++++++-
 5 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d2827e78d7..d01a10ea54 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1710,5 +1710,7 @@ int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
 int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			     struct mlx5_aso_ct_action *ct,
 			     struct rte_flow_action_conntrack *profile);
+int mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
+			  struct mlx5_aso_ct_action *ct);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index edad6007a8..f36eeae03f 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -3553,6 +3553,15 @@ flow_action_handles_translate(struct rte_eth_dev *dev,
 				break;
 			}
 			/* Fall-through */
+		case MLX5_INDIRECT_ACTION_TYPE_CT:
+			if (priv->sh->ct_aso_en) {
+				translated[handle->index].type =
+					RTE_FLOW_ACTION_TYPE_CONNTRACK;
+				translated[handle->index].conf =
+							 (void *)(uintptr_t)idx;
+				break;
+			}
+			/* Fall-through */
 		default:
 			mlx5_free(translated);
 			return rte_flow_error_set
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 8f2bc7d2f6..286e3fb6a4 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -226,6 +226,7 @@ enum mlx5_feature_name {
 #define MLX5_FLOW_ACTION_TUNNEL_MATCH (1ull << 38)
 #define MLX5_FLOW_ACTION_MODIFY_FIELD (1ull << 39)
 #define MLX5_FLOW_ACTION_METER_WITH_TERMINATED_POLICY (1ull << 40)
+#define MLX5_FLOW_ACTION_CT (1ull << 41)
 
 #define MLX5_FLOW_FATE_ACTIONS \
 	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | \
@@ -969,11 +970,15 @@ struct rte_flow {
 	uint32_t drv_type:2; /**< Driver type. */
 	uint32_t tunnel:1;
 	uint32_t meter:24; /**< Holds flow meter id. */
+	uint32_t indirect_type:2; /**< Indirect action type. */
 	uint32_t rix_mreg_copy;
 	/**< Index to metadata register copy table resource. */
 	uint32_t counter; /**< Holds flow counter. */
 	uint32_t tunnel_id;  /**< Tunnel id */
-	uint32_t age; /**< Holds ASO age bit index. */
+	union {
+		uint32_t age; /**< Holds ASO age bit index. */
+		uint32_t ct; /**< Holds ASO CT index. */
+	};
 	uint32_t geneve_tlv_option; /**< Holds Geneve TLV option id. > */
 } __rte_packed;
 
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 3f7ed371bf..d0a989e213 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -1388,3 +1388,44 @@ mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		mlx5_aso_ct_obj_analyze(profile, out_data);
 	return ret;
 }
+
+/*
+ * Make sure the conntrack context is synchronized with hardware before
+ * creating a flow rule that uses it.
+ *
+ * @param[in] sh
+ *   Pointer to shared device context.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
+		      struct mlx5_aso_ct_action *ct)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	enum mlx5_aso_ct_state state =
+				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+
+	if (state == ASO_CONNTRACK_FREE) {
+		rte_errno = ENXIO;
+		return -rte_errno;
+	} else if (state == ASO_CONNTRACK_READY ||
+		   state == ASO_CONNTRACK_QUERY) {
+		return 0;
+	}
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+		if (state == ASO_CONNTRACK_READY ||
+		    state == ASO_CONNTRACK_QUERY)
+			return 0;
+		/* Waiting for CQE ready, consider should block or sleep. */
+		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
+	} while (--poll_cqe_times);
+	rte_errno = EBUSY;
+	return -rte_errno;
+}
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 0fa0671ace..14af900267 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11381,6 +11381,7 @@ flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
 		return rte_flow_error_set(error, EBUSY,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to update CT");
+	ct->is_original = !!pro->is_original_dir;
 	return idx;
 }
 
@@ -11544,6 +11545,8 @@ flow_dv_translate(struct rte_eth_dev *dev,
 		const struct rte_flow_action *found_action = NULL;
 		uint32_t jump_group = 0;
 		struct mlx5_flow_counter *cnt;
+		uint32_t ct_idx;
+		struct mlx5_aso_ct_action *ct;
 
 		if (!mlx5_flow_os_action_supported(action_type))
 			return rte_flow_error_set(error, ENOTSUP,
@@ -12017,6 +12020,26 @@ flow_dv_translate(struct rte_eth_dev *dev,
 				return -rte_errno;
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			break;
+		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+			ct_idx = (uint32_t)(uintptr_t)action->conf;
+			ct = flow_aso_ct_get_by_idx(dev, ct_idx);
+			if (mlx5_aso_ct_available(priv->sh, ct))
+				return rte_flow_error_set(error, rte_errno,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						NULL,
+						"CT is unavailable.");
+			if (ct->is_original)
+				dev_flow->dv.actions[actions_n] =
+							ct->dr_action_orig;
+			else
+				dev_flow->dv.actions[actions_n] =
+							ct->dr_action_rply;
+			flow->indirect_type = MLX5_INDIRECT_ACTION_TYPE_CT;
+			flow->ct = ct_idx;
+			__atomic_fetch_add(&ct->refcnt, 1, __ATOMIC_RELAXED);
+			actions_n++;
+			action_flags |= MLX5_FLOW_ACTION_CT;
+			break;
 		case RTE_FLOW_ACTION_TYPE_END:
 			actions_end = true;
 			if (mhdr_res->actions_num) {
@@ -13152,7 +13175,10 @@ flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
 			mlx5_flow_meter_detach(priv, fm);
 		flow->meter = 0;
 	}
-	if (flow->age)
+	/* Keep the current age handling by default. */
+	if (flow->indirect_type == MLX5_INDIRECT_ACTION_TYPE_CT && flow->ct)
+		flow_dv_aso_ct_release(dev, flow->ct);
+	else if (flow->age)
 		flow_dv_aso_age_release(dev, flow->age);
 	if (flow->geneve_tlv_option) {
 		flow_dv_geneve_tlv_option_resource_release(dev);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 12/17] net/mlx5: add translation of CT item
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
                     ` (10 preceding siblings ...)
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 11/17] net/mlx5: add translation of CT action Bing Zhao
@ 2021-05-05  6:40   ` Bing Zhao
  2021-05-05  6:41   ` [dpdk-dev] [PATCH v3 13/17] net/mlx5: add CT context update Bing Zhao
                     ` (3 subsequent siblings)
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:40 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The return register of the DR action will be used for matching.
After the ASO CT checking of a TCP packet, the syndrome is filled in
the register. Only the 8 LSB should be used. A converting from
RTE_FLOW_CONNTRACK_FLAG* to the syndrome should be done after
checing the spec and mask fields.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    |  7 ++++
 drivers/net/mlx5/mlx5_flow_dv.c | 62 +++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 286e3fb6a4..eb0bb42161 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -405,6 +405,13 @@ enum mlx5_feature_name {
 /* Maximum number of fields to modify in MODIFY_FIELD */
 #define MLX5_ACT_MAX_MOD_FIELDS 5
 
+/* Syndrome bits definition for connection tracking. */
+#define MLX5_CT_SYNDROME_VALID		(0x0 << 6)
+#define MLX5_CT_SYNDROME_INVALID	(0x1 << 6)
+#define MLX5_CT_SYNDROME_TRAP		(0x2 << 6)
+#define MLX5_CT_SYNDROME_STATE_CHANGE	(0x1 << 1)
+#define MLX5_CT_SYNDROME_BAD_PACKET	(0x1 << 0)
+
 enum mlx5_flow_drv_type {
 	MLX5_FLOW_TYPE_MIN,
 	MLX5_FLOW_TYPE_DV,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 14af900267..b0858e3df8 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -9379,6 +9379,64 @@ flow_dv_translate_item_ecpri(struct rte_eth_dev *dev, void *matcher,
 	}
 }
 
+/*
+ * Add connection tracking status item to matcher
+ *
+ * @param[in] dev
+ *   The devich to configure through.
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ */
+static void
+flow_dv_translate_item_aso_ct(struct rte_eth_dev *dev,
+			      void *matcher, void *key,
+			      const struct rte_flow_item *item)
+{
+	uint32_t reg_value = 0;
+	int reg_id;
+	/* 8LSB 0b 11/0000/11, middle 4 bits are reserved. */
+	uint32_t reg_mask = 0;
+	const struct rte_flow_item_conntrack *spec = item->spec;
+	const struct rte_flow_item_conntrack *mask = item->mask;
+	uint32_t flags;
+	struct rte_flow_error error;
+
+	if (!mask)
+		mask = &rte_flow_item_conntrack_mask;
+	if (!spec || !mask->flags)
+		return;
+	flags = spec->flags & mask->flags;
+	/* The conflict should be checked in the validation. */
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_VALID)
+		reg_value |= MLX5_CT_SYNDROME_VALID;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_CHANGED)
+		reg_value |= MLX5_CT_SYNDROME_STATE_CHANGE;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_INVALID)
+		reg_value |= MLX5_CT_SYNDROME_INVALID;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED)
+		reg_value |= MLX5_CT_SYNDROME_TRAP;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD)
+		reg_value |= MLX5_CT_SYNDROME_BAD_PACKET;
+	if (mask->flags & (RTE_FLOW_CONNTRACK_PKT_STATE_VALID |
+			   RTE_FLOW_CONNTRACK_PKT_STATE_INVALID |
+			   RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED))
+		reg_mask |= 0xc0;
+	if (mask->flags & RTE_FLOW_CONNTRACK_PKT_STATE_CHANGED)
+		reg_mask |= MLX5_CT_SYNDROME_STATE_CHANGE;
+	if (mask->flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD)
+		reg_mask |= MLX5_CT_SYNDROME_BAD_PACKET;
+	/* The REG_C_x value could be saved during startup. */
+	reg_id = mlx5_flow_get_reg_id(dev, MLX5_ASO_CONNTRACK, 0, &error);
+	if (reg_id == REG_NON)
+		return;
+	flow_dv_match_meta_reg(matcher, key, (enum modify_reg)reg_id,
+			       reg_value, reg_mask);
+}
+
 static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
 
 #define HEADER_IS_ZERO(match_criteria, headers)				     \
@@ -12322,6 +12380,10 @@ flow_dv_translate(struct rte_eth_dev *dev,
 			/* No other protocol should follow eCPRI layer. */
 			last_item = MLX5_FLOW_LAYER_ECPRI;
 			break;
+		case RTE_FLOW_ITEM_TYPE_CONNTRACK:
+			flow_dv_translate_item_aso_ct(dev, match_mask,
+						      match_value, items);
+			break;
 		default:
 			break;
 		}
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 13/17] net/mlx5: add CT context update
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
                     ` (11 preceding siblings ...)
  2021-05-05  6:40   ` [dpdk-dev] [PATCH v3 12/17] net/mlx5: add translation of CT item Bing Zhao
@ 2021-05-05  6:41   ` Bing Zhao
  2021-05-05  6:41   ` [dpdk-dev] [PATCH v3 14/17] net/mlx5: validation of CT action Bing Zhao
                     ` (2 subsequent siblings)
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:41 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When updating a connection tracking context, two separate parts
could be updated.
First, the direction. This will only update the traffic direction
recorded in the software for flow creation.
Second, the TCP parameters. The hardware context will be updated
via the WQE. This update will be blocked until the hardware status
is updated and ready for the next flow creation.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_dv.c | 56 +++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index b0858e3df8..6afbbbc4bb 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -13824,6 +13824,60 @@ __flow_dv_action_rss_update(struct rte_eth_dev *dev, uint32_t idx,
 	return ret;
 }
 
+/*
+ * Updates in place conntrack context or direction.
+ * Context update should be synchronized.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   The conntrack object ID to be updated.
+ * @param[in] update
+ *   Pointer to the structure of information to update.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value.
+ */
+static int
+__flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
+			   const struct rte_flow_modify_conntrack *update,
+			   struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
+	const struct rte_flow_action_conntrack *new_prf;
+	int ret = 0;
+
+	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (!ct->refcnt)
+		return rte_flow_error_set(error, ENOMEM,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "CT object is inactive");
+	new_prf = &update->new_ct;
+	if (update->direction)
+		ct->is_original = !!new_prf->is_original_dir;
+	if (update->state) {
+		ret = mlx5_aso_ct_update_by_wqe(priv->sh, ct, new_prf);
+		if (ret)
+			return rte_flow_error_set(error, EIO,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"Failed to send CT context update WQE");
+		/* Block until ready or a failure. */
+		ret = mlx5_aso_ct_available(priv->sh, ct);
+		if (ret)
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					   NULL,
+					   "Timeout to get the CT update");
+	}
+	return ret;
+}
+
 /**
  * Updates in place shared action configuration, lock free,
  * (mutex should be acquired by caller).
@@ -13859,6 +13913,8 @@ flow_dv_action_update(struct rte_eth_dev *dev,
 	case MLX5_INDIRECT_ACTION_TYPE_RSS:
 		action_conf = ((const struct rte_flow_action *)update)->conf;
 		return __flow_dv_action_rss_update(dev, idx, action_conf, err);
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		return __flow_dv_action_ct_update(dev, idx, update, err);
 	default:
 		return rte_flow_error_set(err, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 14/17] net/mlx5: validation of CT action
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
                     ` (12 preceding siblings ...)
  2021-05-05  6:41   ` [dpdk-dev] [PATCH v3 13/17] net/mlx5: add CT context update Bing Zhao
@ 2021-05-05  6:41   ` Bing Zhao
  2021-05-05  6:41   ` [dpdk-dev] [PATCH v3 15/17] net/mlx5: validation of CT item Bing Zhao
  2021-05-05  6:41   ` [dpdk-dev] [PATCH v3 16/17] net/mlx5: add support of CT between two ports Bing Zhao
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:41 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The validation of a CT action contains two parts. The first is the
CT action configurations parameter. When creating a CT action
context, some members need to be verified.

The second is that when creating a flow, the DR action of CT should
be validated with other actions and items as well. Currently, only
the TCP protocol support connection tracking.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h         |  4 ++
 drivers/net/mlx5/mlx5_flow.c    | 31 +++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c | 69 +++++++++++++++++++++++++++++++++
 3 files changed, 104 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d01a10ea54..36b7f05822 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1611,6 +1611,10 @@ int mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow,
 void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
 int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
 			uint32_t nb_contexts, struct rte_flow_error *error);
+int mlx5_validate_action_ct(struct rte_eth_dev *dev,
+			    const struct rte_flow_action_conntrack *conntrack,
+			    struct rte_flow_error *error);
+
 
 /* mlx5_mp_os.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index f36eeae03f..6baaefbaba 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1688,6 +1688,37 @@ mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
 	return 0;
 }
 
+/*
+ * Validate the ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] conntrack
+ *   Pointer to the CT action profile.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_validate_action_ct(struct rte_eth_dev *dev,
+			const struct rte_flow_action_conntrack *conntrack,
+			struct rte_flow_error *error)
+{
+	RTE_SET_USED(dev);
+
+	if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Invalid CT state");
+	if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Invalid last TCP packet flag");
+	return 0;
+}
+
 /**
  * Verify the @p attributes will be correctly understood by the NIC and store
  * them in the @p flow if everything is correct.
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 6afbbbc4bb..f2a2c609e2 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -3400,6 +3400,57 @@ flow_dv_validate_action_raw_encap_decap
 	return 0;
 }
 
+/*
+ * Validate the ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] action_flags
+ *   Holds the actions detected until now.
+ * @param[in] item_flags
+ *   The items found in this flow rule.
+ * @param[in] attr
+ *   Pointer to flow attributes.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_action_aso_ct(struct rte_eth_dev *dev,
+			       uint64_t action_flags,
+			       uint64_t item_flags,
+			       const struct rte_flow_attr *attr,
+			       struct rte_flow_error *error)
+{
+	RTE_SET_USED(dev);
+
+	if (attr->group == 0 && !attr->transfer)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "Only support non-root table");
+	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "CT cannot follow a fate action");
+	if ((action_flags & MLX5_FLOW_ACTION_METER) ||
+	    (action_flags & MLX5_FLOW_ACTION_AGE))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Only one ASO action is supported");
+	if (action_flags & MLX5_FLOW_ACTION_ENCAP)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Encap cannot exist before CT");
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					  "Not a outer TCP packet");
+	return 0;
+}
+
 /**
  * Match encap_decap resource.
  *
@@ -7205,6 +7256,14 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			rw_act_num += ret;
 			break;
+		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+			ret = flow_dv_validate_action_aso_ct(dev, action_flags,
+							     item_flags, attr,
+							     error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_FLOW_ACTION_CT;
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ACTION,
@@ -13861,6 +13920,10 @@ __flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
 	if (update->direction)
 		ct->is_original = !!new_prf->is_original_dir;
 	if (update->state) {
+		/* Only validate the profile when it needs to be updated. */
+		ret = mlx5_validate_action_ct(dev, new_prf, error);
+		if (ret)
+			return ret;
 		ret = mlx5_aso_ct_update_by_wqe(priv->sh, ct, new_prf);
 		if (ret)
 			return rte_flow_error_set(error, EIO,
@@ -15732,6 +15795,12 @@ flow_dv_action_validate(struct rte_eth_dev *dev,
 						NULL,
 					     "shared age action not supported");
 		return flow_dv_validate_action_age(0, action, dev, err);
+	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+		if (!priv->sh->ct_aso_en)
+			return rte_flow_error_set(err, ENOTSUP,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					"ASO CT is not supported");
+		return mlx5_validate_action_ct(dev, action->conf, err);
 	default:
 		return rte_flow_error_set(err, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 15/17] net/mlx5: validation of CT item
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
                     ` (13 preceding siblings ...)
  2021-05-05  6:41   ` [dpdk-dev] [PATCH v3 14/17] net/mlx5: validation of CT action Bing Zhao
@ 2021-05-05  6:41   ` Bing Zhao
  2021-05-05  6:41   ` [dpdk-dev] [PATCH v3 16/17] net/mlx5: add support of CT between two ports Bing Zhao
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:41 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The item of ASO connection tracking will be translated into the
register value when matching. The validation of this item has no
dependency on other layers, since the flow including this item
should be jumped from another group. All the layers checking was
already done in the previous groups. Only the state bits conflict
should be checked.

It is assumed that the flow with CT item will always work on the
TCP traffic.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    |  3 ++
 drivers/net/mlx5/mlx5_flow_dv.c | 51 +++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index eb0bb42161..238befa2d4 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -147,6 +147,9 @@ enum mlx5_feature_name {
 #define MLX5_FLOW_LAYER_GENEVE_OPT (UINT64_C(1) << 32)
 #define MLX5_FLOW_LAYER_GTP_PSC (UINT64_C(1) << 33)
 
+/* Conntrack item. */
+#define MLX5_FLOW_LAYER_ASO_CT (UINT64_C(1) << 34)
+
 /* Outer Masks. */
 #define MLX5_FLOW_LAYER_OUTER_L3 \
 	(MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index f2a2c609e2..aa0a5acdca 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -2598,6 +2598,51 @@ flow_dv_validate_item_ipv6_frag_ext(const struct rte_flow_item *item,
 				  "specified range not supported");
 }
 
+/*
+ * Validate ASO CT item.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Pointer to bit-fields that holds the items detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_item_aso_ct(struct rte_eth_dev *dev,
+			     const struct rte_flow_item *item,
+			     uint64_t *item_flags,
+			     struct rte_flow_error *error)
+{
+	const struct rte_flow_item_conntrack *spec = item->spec;
+	const struct rte_flow_item_conntrack *mask = item->mask;
+	RTE_SET_USED(dev);
+	uint32_t flags;
+
+	if (*item_flags & MLX5_FLOW_LAYER_ASO_CT)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+					  "Only one CT is supported");
+	if (!mask)
+		mask = &rte_flow_item_conntrack_mask;
+	flags = spec->flags & mask->flags;
+	if ((flags & RTE_FLOW_CONNTRACK_PKT_STATE_VALID) &&
+	    ((flags & RTE_FLOW_CONNTRACK_PKT_STATE_INVALID) ||
+	     (flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD) ||
+	     (flags & RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED)))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+					  "Conflict status bits");
+	/* State change also needs to be considered. */
+	*item_flags |= MLX5_FLOW_LAYER_ASO_CT;
+	return 0;
+}
+
 /**
  * Validate the pop VLAN action.
  *
@@ -6696,6 +6741,12 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 				return ret;
 			last_item = MLX5_FLOW_LAYER_ECPRI;
 			break;
+		case RTE_FLOW_ITEM_TYPE_CONNTRACK:
+			ret = flow_dv_validate_item_aso_ct(dev, items,
+							   &item_flags, error);
+			if (ret < 0)
+				return ret;
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ITEM,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 16/17] net/mlx5: add support of CT between two ports
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
                     ` (14 preceding siblings ...)
  2021-05-05  6:41   ` [dpdk-dev] [PATCH v3 15/17] net/mlx5: validation of CT item Bing Zhao
@ 2021-05-05  6:41   ` Bing Zhao
  15 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:41 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After creating a connection tracking context, it can be used between
two ports. For each port, the flow for one direction traffic will
be created.

The context can only be shared between the owner port and the peer
port that was specified when being created. Only the owner port
could update the context or query it in current implementation.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    | 57 +++++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_flow_dv.c | 71 +++++++++++++++++++++++++--------
 2 files changed, 110 insertions(+), 18 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 238befa2d4..ddaba40f72 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -48,6 +48,25 @@ enum {
 	MLX5_INDIRECT_ACTION_TYPE_CT,
 };
 
+/* Now, the maximal ports will be supported is 256, action number is 4M. */
+#define MLX5_INDIRECT_ACT_CT_MAX_PORT 0x100
+
+#define MLX5_INDIRECT_ACT_CT_OWNER_SHIFT 22
+#define MLX5_INDIRECT_ACT_CT_OWNER_MASK (MLX5_INDIRECT_ACT_CT_MAX_PORT - 1)
+
+/* 30-31: type, 22-29: owner port, 0-21: index. */
+#define MLX5_INDIRECT_ACT_CT_GEN_IDX(owner, index) \
+	((MLX5_INDIRECT_ACTION_TYPE_CT << MLX5_INDIRECT_ACTION_TYPE_OFFSET) | \
+	 (((owner) & MLX5_INDIRECT_ACT_CT_OWNER_MASK) << \
+	  MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) | (index))
+
+#define MLX5_INDIRECT_ACT_CT_GET_OWNER(index) \
+	(((index) >> MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) & \
+	 MLX5_INDIRECT_ACT_CT_OWNER_MASK)
+
+#define MLX5_INDIRECT_ACT_CT_GET_IDX(index) \
+	((index) & ((1 << MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) - 1))
+
 /* Matches on selected register. */
 struct mlx5_rte_flow_item_tag {
 	enum modify_reg id;
@@ -1304,7 +1323,7 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
 }
 
 /*
- * Get ASO CT action by index.
+ * Get ASO CT action by device and index.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
@@ -1315,7 +1334,7 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
  *   The specified ASO CT action pointer.
  */
 static inline struct mlx5_aso_ct_action *
-flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
+flow_aso_ct_get_by_dev_idx(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
@@ -1330,6 +1349,40 @@ flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
 	return &pool->actions[idx % MLX5_ASO_CT_ACTIONS_PER_POOL];
 }
 
+/*
+ * Get ASO CT action by owner & index.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index to the ASO CT action and owner port combination.
+ *
+ * @return
+ *   The specified ASO CT action pointer.
+ */
+static inline struct mlx5_aso_ct_action *
+flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t own_idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(own_idx);
+	uint32_t idx = MLX5_INDIRECT_ACT_CT_GET_IDX(own_idx);
+
+	if (owner == PORT_ID(priv)) {
+		ct = flow_aso_ct_get_by_dev_idx(dev, idx);
+	} else {
+		struct rte_eth_dev *owndev = &rte_eth_devices[owner];
+
+		MLX5_ASSERT(owner < RTE_MAX_ETHPORTS);
+		if (dev->data->dev_started != 1)
+			return NULL;
+		ct = flow_aso_ct_get_by_dev_idx(owndev, idx);
+		if (ct->peer != PORT_ID(priv))
+			return NULL;
+	}
+	return ct;
+}
+
 int mlx5_flow_group_to_table(struct rte_eth_dev *dev,
 			     const struct mlx5_flow_tunnel *tunnel,
 			     uint32_t group, uint32_t *table,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index aa0a5acdca..ca55cff48b 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11289,7 +11289,7 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
 }
 
 /*
- * Release an ASO CT action.
+ * Release an ASO CT action by its own device.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
@@ -11300,12 +11300,12 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
  *   0 when CT action was removed, otherwise the number of references.
  */
 static inline int
-flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
+flow_dv_aso_ct_dev_release(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
 	uint32_t ret;
-	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
+	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_dev_idx(dev, idx);
 	enum mlx5_aso_ct_state state =
 			__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
 
@@ -11334,7 +11334,21 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
 		rte_spinlock_unlock(&mng->ct_sl);
 	}
-	return ret;
+	return (int)ret;
+}
+
+static inline int
+flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t own_idx)
+{
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(own_idx);
+	uint32_t idx = MLX5_INDIRECT_ACT_CT_GET_IDX(own_idx);
+	struct rte_eth_dev *owndev = &rte_eth_devices[owner];
+	RTE_SET_USED(dev);
+
+	MLX5_ASSERT(owner < RTE_MAX_ETHPORTS);
+	if (dev->data->dev_started != 1)
+		return -1;
+	return flow_dv_aso_ct_dev_release(owndev, idx);
 }
 
 /*
@@ -11486,7 +11500,7 @@ flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
 		RTE_SET_USED(reg_c);
 #endif
 		if (!ct->dr_action_orig) {
-			flow_dv_aso_ct_release(dev, ct_idx);
+			flow_dv_aso_ct_dev_release(dev, ct_idx);
 			rte_flow_error_set(error, rte_errno,
 					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					   "failed to create ASO CT action");
@@ -11502,7 +11516,7 @@ flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
 			 reg_c - REG_C_0);
 #endif
 		if (!ct->dr_action_rply) {
-			flow_dv_aso_ct_release(dev, ct_idx);
+			flow_dv_aso_ct_dev_release(dev, ct_idx);
 			rte_flow_error_set(error, rte_errno,
 					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					   "failed to create ASO CT action");
@@ -11544,12 +11558,13 @@ flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
 		return rte_flow_error_set(error, rte_errno,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to allocate CT object");
-	ct = flow_aso_ct_get_by_idx(dev, idx);
+	ct = flow_aso_ct_get_by_dev_idx(dev, idx);
 	if (mlx5_aso_ct_update_by_wqe(sh, ct, pro))
 		return rte_flow_error_set(error, EBUSY,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to update CT");
 	ct->is_original = !!pro->is_original_dir;
+	ct->peer = pro->peer_port;
 	return idx;
 }
 
@@ -11713,7 +11728,7 @@ flow_dv_translate(struct rte_eth_dev *dev,
 		const struct rte_flow_action *found_action = NULL;
 		uint32_t jump_group = 0;
 		struct mlx5_flow_counter *cnt;
-		uint32_t ct_idx;
+		uint32_t owner_idx;
 		struct mlx5_aso_ct_action *ct;
 
 		if (!mlx5_flow_os_action_supported(action_type))
@@ -12189,8 +12204,13 @@ flow_dv_translate(struct rte_eth_dev *dev,
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			break;
 		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
-			ct_idx = (uint32_t)(uintptr_t)action->conf;
-			ct = flow_aso_ct_get_by_idx(dev, ct_idx);
+			owner_idx = (uint32_t)(uintptr_t)action->conf;
+			ct = flow_aso_ct_get_by_idx(dev, owner_idx);
+			if (!ct)
+				return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						NULL,
+						"Failed to get CT object.");
 			if (mlx5_aso_ct_available(priv->sh, ct))
 				return rte_flow_error_set(error, rte_errno,
 						RTE_FLOW_ERROR_TYPE_ACTION,
@@ -12203,7 +12223,7 @@ flow_dv_translate(struct rte_eth_dev *dev,
 				dev_flow->dv.actions[actions_n] =
 							ct->dr_action_rply;
 			flow->indirect_type = MLX5_INDIRECT_ACTION_TYPE_CT;
-			flow->ct = ct_idx;
+			flow->ct = owner_idx;
 			__atomic_fetch_add(&ct->refcnt, 1, __ATOMIC_RELAXED);
 			actions_n++;
 			action_flags |= MLX5_FLOW_ACTION_CT;
@@ -13803,8 +13823,7 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
 		ret = flow_dv_translate_create_conntrack(dev, action->conf,
 							 err);
-		idx = (MLX5_INDIRECT_ACTION_TYPE_CT <<
-		       MLX5_INDIRECT_ACTION_TYPE_OFFSET) | ret;
+		idx = MLX5_INDIRECT_ACT_CT_GEN_IDX(PORT_ID(priv), ret);
 		break;
 	default:
 		rte_flow_error_set(err, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
@@ -13856,7 +13875,9 @@ flow_dv_action_destroy(struct rte_eth_dev *dev,
 		return 0;
 	case MLX5_INDIRECT_ACTION_TYPE_CT:
 		ret = flow_dv_aso_ct_release(dev, idx);
-		if (ret)
+		if (ret < 0)
+			return ret;
+		if (ret > 0)
 			DRV_LOG(DEBUG, "Connection tracking object %u still "
 				"has references %d.", idx, ret);
 		return 0;
@@ -13960,8 +13981,16 @@ __flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
 	struct mlx5_aso_ct_action *ct;
 	const struct rte_flow_action_conntrack *new_prf;
 	int ret = 0;
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(idx);
+	uint32_t dev_idx;
 
-	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (PORT_ID(priv) != owner)
+		return rte_flow_error_set(error, EACCES,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "CT object owned by another port");
+	dev_idx = MLX5_INDIRECT_ACT_CT_GET_IDX(idx);
+	ct = flow_aso_ct_get_by_dev_idx(dev, dev_idx);
 	if (!ct->refcnt)
 		return rte_flow_error_set(error, ENOMEM,
 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -14049,6 +14078,8 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_action *ct;
+	uint16_t owner;
+	uint32_t dev_idx;
 
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
@@ -14063,7 +14094,15 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 			     (&age_param->sec_since_last_hit, __ATOMIC_RELAXED);
 		return 0;
 	case MLX5_INDIRECT_ACTION_TYPE_CT:
-		ct = flow_aso_ct_get_by_idx(dev, idx);
+		owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(idx);
+		if (owner != PORT_ID(priv))
+			return rte_flow_error_set(error, EACCES,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"CT object owned by another port");
+		dev_idx = MLX5_INDIRECT_ACT_CT_GET_IDX(idx);
+		ct = flow_aso_ct_get_by_dev_idx(dev, dev_idx);
+		MLX5_ASSERT(ct);
 		if (!ct->refcnt)
 			return rte_flow_error_set(error, EFAULT,
 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (18 preceding siblings ...)
  2021-05-05  6:40 ` [dpdk-dev] [PATCH v3 " Bing Zhao
@ 2021-05-05  6:49 ` Bing Zhao
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 01/17] common/mlx5: add connection tracking object definition Bing Zhao
                     ` (16 more replies)
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (3 subsequent siblings)
  23 siblings, 17 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

This patch set adds the connection tracking offload support in the
mlx5 driver, as well as the documents update.
 
---
v2: code bug fixes, commits clean up and doc update.
v3: fix error input pointer for CT MR registering
---

Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

Bing Zhao (17):
  common/mlx5: add connection tracking object definition
  common/mlx5: add CT offload capability checking
  net/mlx5: use meter color reg for CT
  net/mlx5: initialization of CT management
  common/mlx5: add Dexv CT objects creation
  net/mlx5: add modify support for CT
  net/mlx5: add actions creating for CT
  net/mlx5: close CT management structure
  net/mlx5: add ASO CT query implementation
  net/mlx5: add ASO CT destroy handling
  net/mlx5: add translation of CT action
  net/mlx5: add translation of CT item
  net/mlx5: add CT context update
  net/mlx5: validation of CT action
  net/mlx5: validation of CT item
  net/mlx5: add support of CT between two ports
  doc: update mlx5 support for conntrack

 doc/guides/nics/features/default.ini   |   1 +
 doc/guides/nics/features/mlx5.ini      |   1 +
 doc/guides/nics/mlx5.rst               |  14 +
 doc/guides/rel_notes/release_21_05.rst |   2 +
 drivers/common/mlx5/linux/meson.build  |   2 +
 drivers/common/mlx5/mlx5_devx_cmds.c   |  53 +++
 drivers/common/mlx5/mlx5_devx_cmds.h   |   5 +
 drivers/common/mlx5/mlx5_prm.h         |  88 ++++
 drivers/common/mlx5/version.map        |   1 +
 drivers/net/mlx5/linux/mlx5_os.c       |  13 +
 drivers/net/mlx5/mlx5.c                |  92 ++++
 drivers/net/mlx5/mlx5.h                |  76 ++++
 drivers/net/mlx5/mlx5_flow.c           |  44 +-
 drivers/net/mlx5/mlx5_flow.h           | 101 ++++-
 drivers/net/mlx5/mlx5_flow_aso.c       | 592 ++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c        | 601 ++++++++++++++++++++++++-
 16 files changed, 1683 insertions(+), 3 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 01/17] common/mlx5: add connection tracking object definition
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
@ 2021-05-05  6:49   ` Bing Zhao
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 02/17] common/mlx5: add CT offload capability checking Bing Zhao
                     ` (15 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The structures of ASO connection tracking offload object are added
based on the definitions in the PRM. One CT object context will be
loaded into the cache completely in a reversed order of dwords. The
valid bit should be the MSB of the last dword. This is used for the
conntrack context creation and update, as well as for the query.

The capabilities 2 (HCA_CAP_2) layout is also added. The connection
tracking related capabilities could be queried via the HCA_CAP_2.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/mlx5_prm.h | 85 ++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index efa5ae67bf..4da89d3379 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1119,6 +1119,7 @@ enum {
 	MLX5_GET_HCA_CAP_OP_MOD_ROCE = 0x4 << 1,
 	MLX5_GET_HCA_CAP_OP_MOD_NIC_FLOW_TABLE = 0x7 << 1,
 	MLX5_GET_HCA_CAP_OP_MOD_VDPA_EMULATION = 0x13 << 1,
+	MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE_2 = 0x20 << 1,
 };
 
 #define MLX5_GENERAL_OBJ_TYPES_CAP_VIRTQ_NET_Q \
@@ -1661,6 +1662,29 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties;
 };
 
+struct mlx5_ifc_cmd_hca_cap_2_bits {
+	u8 reserved_at_0[0x80]; /* End of DW4. */
+	u8 reserved_at_80[0xb];
+	u8 log_max_num_reserved_qpn[0x5];
+	u8 reserved_at_90[0x3];
+	u8 log_reserved_qpn_granularity[0x5];
+	u8 reserved_at_98[0x3];
+	u8 log_reserved_qpn_max_alloc[0x5]; /* End of DW5. */
+	u8 max_reformat_insert_size[0x8];
+	u8 max_reformat_insert_offset[0x8];
+	u8 max_reformat_remove_size[0x8];
+	u8 max_reformat_remove_offset[0x8]; /* End of DW6. */
+	u8 aso_conntrack_reg_id[0x8];
+	u8 reserved_at_c8[0x3];
+	u8 log_conn_track_granularity[0x5];
+	u8 reserved_at_d0[0x3];
+	u8 log_conn_track_max_alloc[0x5];
+	u8 reserved_at_d8[0x3];
+	u8 log_max_conn_track_offload[0x5];
+	u8 reserved_at_e0[0x20]; /* End of DW7. */
+	u8 reserved_at_100[0x700];
+};
+
 union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
 	struct mlx5_ifc_per_protocol_networking_offload_caps_bits
@@ -2599,6 +2623,67 @@ struct mlx5_ifc_create_flow_meter_aso_in_bits {
 	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
 	struct mlx5_ifc_flow_meter_aso_bits flow_meter_aso;
 };
+
+struct mlx5_ifc_tcp_window_params_bits {
+	u8 max_ack[0x20];
+	u8 max_win[0x20];
+	u8 reply_end[0x20];
+	u8 sent_end[0x20];
+};
+
+struct mlx5_ifc_conn_track_aso_bits {
+	struct mlx5_ifc_tcp_window_params_bits reply_dir; /* End of DW3. */
+	struct mlx5_ifc_tcp_window_params_bits original_dir; /* End of DW7. */
+	u8 last_end[0x20]; /* End of DW8. */
+	u8 last_ack[0x20]; /* End of DW9. */
+	u8 last_seq[0x20]; /* End of DW10. */
+	u8 last_win[0x10];
+	u8 reserved_at_170[0xa];
+	u8 last_dir[0x1];
+	u8 last_index[0x5]; /* End of DW11. */
+	u8 reserved_at_180[0x40]; /* End of DW13. */
+	u8 reply_direction_tcp_scale[0x4];
+	u8 reply_direction_tcp_close_initiated[0x1];
+	u8 reply_direction_tcp_liberal_enabled[0x1];
+	u8 reply_direction_tcp_data_unacked[0x1];
+	u8 reply_direction_tcp_max_ack[0x1];
+	u8 reserved_at_1c8[0x8];
+	u8 original_direction_tcp_scale[0x4];
+	u8 original_direction_tcp_close_initiated[0x1];
+	u8 original_direction_tcp_liberal_enabled[0x1];
+	u8 original_direction_tcp_data_unacked[0x1];
+	u8 original_direction_tcp_max_ack[0x1];
+	u8 reserved_at_1d8[0x8]; /* End of DW14. */
+	u8 valid[0x1];
+	u8 state[0x3];
+	u8 freeze_track[0x1];
+	u8 reserved_at_1e5[0xb];
+	u8 reserved_at_1f0[0x1];
+	u8 connection_assured[0x1];
+	u8 sack_permitted[0x1];
+	u8 challenged_acked[0x1];
+	u8 heartbeat[0x1];
+	u8 max_ack_window[0x3];
+	u8 reserved_at_1f8[0x1];
+	u8 retransmission_counter[0x3];
+	u8 retranmission_limit_exceeded[0x1];
+	u8 retranmission_limit[0x3]; /* End of DW15. */
+};
+
+struct mlx5_ifc_conn_track_offload_bits {
+	u8 modify_field_select[0x40];
+	u8 reserved_at_40[0x40];
+	u8 reserved_at_80[0x8];
+	u8 conn_track_aso_access_pd[0x18];
+	u8 reserved_at_a0[0x160];
+	struct mlx5_ifc_conn_track_aso_bits conn_track_aso;
+};
+
+struct mlx5_ifc_create_conn_track_aso_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+	struct mlx5_ifc_conn_track_offload_bits conn_track_offload;
+};
+
 enum mlx5_access_aso_opc_mod {
 	ASO_OPC_MOD_IPSEC = 0x0,
 	ASO_OPC_MOD_CONNECTION_TRACKING = 0x1,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 02/17] common/mlx5: add CT offload capability checking
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 01/17] common/mlx5: add connection tracking object definition Bing Zhao
@ 2021-05-05  6:49   ` Bing Zhao
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 03/17] net/mlx5: use meter color reg for CT Bing Zhao
                     ` (14 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

During startup, the ASO connection tracking offload capability could
be queried via HCA_CAP_QUERY command. If the HW doesn't support ASO
CT, the value would be 0 by default. The following initialization
should be skipped and the creation of the CT object should return
a failure directly.

The following CT creation should also check this capability. With
the old driver, the pre-processing macro should be used in order to
make the compiling pass.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/linux/meson.build | 2 ++
 drivers/common/mlx5/mlx5_devx_cmds.c  | 3 +++
 drivers/common/mlx5/mlx5_devx_cmds.h  | 1 +
 drivers/common/mlx5/mlx5_prm.h        | 3 +++
 4 files changed, 9 insertions(+)

diff --git a/drivers/common/mlx5/linux/meson.build b/drivers/common/mlx5/linux/meson.build
index 3334bd5cb2..007834a49b 100644
--- a/drivers/common/mlx5/linux/meson.build
+++ b/drivers/common/mlx5/linux/meson.build
@@ -189,6 +189,8 @@ has_sym_args = [
             'MLX5_WQE_UMR_CTRL_FLAG_INLINE' ],
         [ 'HAVE_MLX5_DR_FLOW_DUMP_RULE', 'infiniband/mlx5dv.h',
             'mlx5dv_dump_dr_rule' ],
+        [ 'HAVE_MLX5_DR_ACTION_ASO_CT', 'infiniband/mlx5dv.h',
+            'MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_INITIATOR' ],
 ]
 config = configuration_data()
 foreach arg:has_sym_args
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 79fff6457c..ad67883fde 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -760,6 +760,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
 		MLX5_GET(cmd_hca_cap, hcattr, umr_indirect_mkey_disabled);
 	attr->umr_modify_entity_size_disabled =
 		MLX5_GET(cmd_hca_cap, hcattr, umr_modify_entity_size_disabled);
+	attr->ct_offload = !!(MLX5_GET64(cmd_hca_cap, hcattr,
+					 general_obj_types) &
+			      MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD);
 	if (attr->qos.sup) {
 		MLX5_SET(query_hca_cap_in, in, op_mod,
 			 MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP |
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 870bdb6b30..746320cf04 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -137,6 +137,7 @@ struct mlx5_hca_attr {
 	uint32_t qp_ts_format:2;
 	uint32_t regex:1;
 	uint32_t reg_c_preserve:1;
+	uint32_t ct_offload:1; /* General obj type ASO CT offload supported. */
 	uint32_t regexp_num_of_engines;
 	uint32_t log_max_ft_sampler_num:8;
 	uint32_t geneve_tlv_opt;
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 4da89d3379..71bdf43668 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1134,6 +1134,8 @@ enum {
 			(1ULL << MLX5_GENERAL_OBJ_TYPE_FLOW_METER_ASO)
 #define MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT \
 			(1ULL << MLX5_OBJ_TYPE_GENEVE_TLV_OPT)
+#define MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD \
+			(1ULL << MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD)
 
 enum {
 	MLX5_HCA_CAP_OPMOD_GET_MAX   = 0,
@@ -2456,6 +2458,7 @@ enum {
 	MLX5_GENERAL_OBJ_TYPE_FLEX_PARSE_GRAPH = 0x0022,
 	MLX5_GENERAL_OBJ_TYPE_FLOW_METER_ASO = 0x0024,
 	MLX5_GENERAL_OBJ_TYPE_FLOW_HIT_ASO = 0x0025,
+	MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD = 0x0031,
 };
 
 struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 03/17] net/mlx5: use meter color reg for CT
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 01/17] common/mlx5: add connection tracking object definition Bing Zhao
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 02/17] common/mlx5: add CT offload capability checking Bing Zhao
@ 2021-05-05  6:49   ` Bing Zhao
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 04/17] net/mlx5: initialization of CT management Bing Zhao
                     ` (13 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Based on the capacity, 3 registers could be used. Due to the register
allocation, only the one REG_C_3 for meter color could be reused
right now.

Then in the same flow, no more than one ASO action can be supported.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.c | 4 +++-
 drivers/net/mlx5/mlx5_flow.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index a7ceafe221..edad6007a8 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -760,7 +760,9 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
 			       REG_C_3;
 	case MLX5_MTR_COLOR:
-	case MLX5_ASO_FLOW_HIT: /* Both features use the same REG_C. */
+	case MLX5_ASO_FLOW_HIT:
+	case MLX5_ASO_CONNTRACK:
+		/* All features use the same REG_C. */
 		MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
 		return priv->mtr_color_reg;
 	case MLX5_COPY_MARK:
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index cc3e79d088..964e13a869 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -84,6 +84,7 @@ enum mlx5_feature_name {
 	MLX5_MTR_COLOR,
 	MLX5_MTR_ID,
 	MLX5_ASO_FLOW_HIT,
+	MLX5_ASO_CONNTRACK,
 };
 
 /* Default queue number. */
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 04/17] net/mlx5: initialization of CT management
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (2 preceding siblings ...)
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 03/17] net/mlx5: use meter color reg for CT Bing Zhao
@ 2021-05-05  6:49   ` Bing Zhao
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 05/17] common/mlx5: add Dexv CT objects creation Bing Zhao
                     ` (12 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The definitions of ASO connection tracking objects management
structures are added.

Considering performance, the bulk allocation of ASO CT objects
should be used. The maximal value per bulk and the granularity could
be fetched from HCA capabilities 2. Right now, a fixed number of 64
is used for each bulk for a better management purpose.

The ASO QP for CT is initialized, the SQ will be used for both
modify and query command.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c | 13 +++++++++
 drivers/net/mlx5/mlx5.c          | 36 +++++++++++++++++++++++
 drivers/net/mlx5/mlx5.h          | 50 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_aso.c | 50 ++++++++++++++++++++++++++++++++
 4 files changed, 149 insertions(+)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 479ee7d8d1..5ac787106d 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1323,6 +1323,19 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			DRV_LOG(DEBUG, "Flow Hit ASO is supported.");
 		}
 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */
+#if defined(HAVE_MLX5_DR_CREATE_ACTION_ASO) && \
+	defined(HAVE_MLX5_DR_ACTION_ASO_CT)
+		if (config->hca_attr.ct_offload &&
+		    priv->mtr_color_reg == REG_C_3) {
+			err = mlx5_flow_aso_ct_mng_init(sh);
+			if (err) {
+				err = -err;
+				goto error;
+			}
+			DRV_LOG(DEBUG, "CT ASO is supported.");
+			sh->ct_aso_en = 1;
+		}
+#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO && HAVE_MLX5_DR_ACTION_ASO_CT */
 #if defined(HAVE_MLX5DV_DR) && defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_SAMPLE)
 		if (config->hca_attr.log_max_ft_sampler_num > 0  &&
 		    config->dv_flow_en) {
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 8cd6f1eaee..86dbe6d573 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -670,6 +670,42 @@ mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh)
 	}
 }
 
+/*
+ * Initialize the ASO connection tracking structure.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
+{
+	int err;
+
+	if (sh->ct_mng)
+		return 0;
+	sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng),
+				 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+	if (!sh->ct_mng) {
+		DRV_LOG(ERR, "ASO CT management allocation failed.");
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
+	if (err) {
+		mlx5_free(sh->ct_mng);
+		/* rte_errno should be extracted from the failure. */
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	rte_spinlock_init(&sh->ct_mng->ct_sl);
+	rte_rwlock_init(&sh->ct_mng->resize_rwl);
+	LIST_INIT(&sh->ct_mng->free_cts);
+	return 0;
+}
+
 /**
  * Initialize the flow resources' indexed mempool.
  *
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c62977613a..1a5c78fa3a 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -983,6 +983,52 @@ struct mlx5_bond_info {
 	} ports[MLX5_BOND_MAX_PORTS];
 };
 
+/* Number of connection tracking objects per pool: must be a power of 2. */
+#define MLX5_ASO_CT_ACTIONS_PER_POOL 64
+
+/* ASO Conntrack state. */
+enum mlx5_aso_ct_state {
+	ASO_CONNTRACK_FREE, /* Inactive, in the free list. */
+	ASO_CONNTRACK_WAIT, /* WQE sent in the SQ. */
+	ASO_CONNTRACK_READY, /* CQE received w/o error. */
+	ASO_CONNTRACK_QUERY, /* WQE for query sent. */
+	ASO_CONNTRACK_MAX, /* Guard. */
+};
+
+/* Generic ASO connection tracking structure. */
+struct mlx5_aso_ct_action {
+	LIST_ENTRY(mlx5_aso_ct_action) next; /* Pointer to the next ASO CT. */
+	void *dr_action_orig; /* General action object for original dir. */
+	void *dr_action_rply; /* General action object for reply dir. */
+	uint32_t refcnt; /* Action used count in device flows. */
+	uint16_t offset; /* Offset of ASO CT in DevX objects bulk. */
+	uint16_t peer; /* The only peer port index could also use this CT. */
+	enum mlx5_aso_ct_state state; /* ASO CT state. */
+	bool is_original; /* The direction of the DR action to be used. */
+};
+
+/* ASO connection tracking software pool definition. */
+struct mlx5_aso_ct_pool {
+	uint16_t index; /* Pool index in pools array. */
+	struct mlx5_devx_obj *devx_obj;
+	/* The first devx object in the bulk, used for freeing (not yet). */
+	struct mlx5_aso_ct_action actions[MLX5_ASO_CT_ACTIONS_PER_POOL];
+	/* CT action structures bulk. */
+};
+
+LIST_HEAD(aso_ct_list, mlx5_aso_ct_action);
+
+/* Pools management structure for ASO connection tracking pools. */
+struct mlx5_aso_ct_pools_mng {
+	struct mlx5_aso_ct_pool **pools;
+	uint16_t n; /* Total number of pools. */
+	uint16_t next; /* Number of pools in use, index of next free pool. */
+	rte_spinlock_t ct_sl; /* The ASO CT free list lock. */
+	rte_rwlock_t resize_rwl; /* The ASO CT pool resize lock. */
+	struct aso_ct_list free_cts; /* Free ASO CT objects list. */
+	struct mlx5_aso_sq aso_sq; /* ASO queue objects. */
+};
+
 /*
  * Shared Infiniband device context for Master/Representors
  * which belong to same IB device with multiple IB ports.
@@ -996,6 +1042,7 @@ struct mlx5_dev_ctx_shared {
 	uint32_t sq_ts_format:2; /* SQ timestamp formats supported. */
 	uint32_t qp_ts_format:2; /* QP timestamp formats supported. */
 	uint32_t meter_aso_en:1; /* Flow Meter ASO is supported. */
+	uint32_t ct_aso_en:1; /* Connection Tracking ASO is supported. */
 	uint32_t max_port; /* Maximal IB device port index. */
 	struct mlx5_bond_info bond; /* Bonding information. */
 	void *ctx; /* Verbs/DV/DevX context. */
@@ -1058,6 +1105,8 @@ struct mlx5_dev_ctx_shared {
 	rte_spinlock_t geneve_tlv_opt_sl; /* Lock for geneve tlv resource */
 	struct mlx5_flow_mtr_mng *mtrmng;
 	/* Meter management structure. */
+	struct mlx5_aso_ct_pools_mng *ct_mng;
+	/* Management data for ASO connection tracking. */
 	struct mlx5_dev_shared_port port[]; /* per device port data array. */
 };
 
@@ -1355,6 +1404,7 @@ bool mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev);
 int mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev);
 int mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh);
 int mlx5_aso_flow_mtrs_mng_init(struct mlx5_dev_ctx_shared *sh);
+int mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh);
 
 /* mlx5_ethdev.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 300987d0e9..c24d865284 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -186,6 +186,43 @@ mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq)
 	}
 }
 
+/*
+ * Initialize Send Queue used for ASO connection tracking.
+ *
+ * @param[in] sq
+ *   ASO SQ to initialize.
+ */
+static void
+mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
+{
+	volatile struct mlx5_aso_wqe *restrict wqe;
+	int i;
+	int size = 1 << sq->log_desc_n;
+	uint64_t addr;
+
+	/* All the next fields state should stay constant. */
+	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
+		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
+							  (sizeof(*wqe) >> 4));
+		/* One unique MR for the query data. */
+		wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.mkey->id);
+		/* Magic number 64 represents the length of a ASO CT obj. */
+		addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64);
+		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
+		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
+		/*
+		 * The values of operand_masks are different for modify
+		 * and query.
+		 * And data_mask may be different for each modification. In
+		 * query, it could be zero and ignored.
+		 * CQE generation is always needed, in order to decide when
+		 * it is available to create the flow or read the data.
+		 */
+		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
+						   MLX5_COMP_MODE_OFFSET);
+	}
+}
+
 /**
  * Create Send Queue used for ASO access.
  *
@@ -293,6 +330,19 @@ mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
 			return -1;
 		mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
 		break;
+	case ASO_OPC_MOD_CONNECTION_TRACKING:
+		/* 64B per object for query. */
+		if (mlx5_aso_reg_mr(sh, 64 * sq_desc_n,
+				    &sh->ct_mng->aso_sq.mr, 0))
+			return -1;
+		if (mlx5_aso_sq_create(sh->ctx, &sh->ct_mng->aso_sq, 0,
+				sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
+				sh->sq_ts_format)) {
+			mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
+			return -1;
+		}
+		mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
+		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
 		return -1;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 05/17] common/mlx5: add Dexv CT objects creation
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (3 preceding siblings ...)
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 04/17] net/mlx5: initialization of CT management Bing Zhao
@ 2021-05-05  6:49   ` Bing Zhao
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 06/17] net/mlx5: add modify support for CT Bing Zhao
                     ` (11 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Adding support for connection tracking ASO creation via Devx command.
Right now only bulk creation is supported.

By default, the objects with zero contents will be created. Before
using a single object, the modification via posting a WQE to the ASO
CT SQ is needed.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c | 50 ++++++++++++++++++++++++++++
 drivers/common/mlx5/mlx5_devx_cmds.h |  4 +++
 drivers/common/mlx5/version.map      |  1 +
 3 files changed, 55 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index ad67883fde..dc01266642 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -2232,6 +2232,56 @@ mlx5_devx_cmd_create_flow_meter_aso_obj(void *ctx, uint32_t pd,
 	return flow_meter_aso_obj;
 }
 
+/*
+ * Create general object of type CONN_TRACK_OFFLOAD using DevX API.
+ *
+ * @param[in] ctx
+ *   Context returned from mlx5 open_device() glue function.
+ * @param [in] pd
+ *   PD value to associate the CONN_TRACK_OFFLOAD ASO object with.
+ * @param [in] log_obj_size
+ *   log_obj_size to allocate its power of 2 * objects
+ *   in one CONN_TRACK_OFFLOAD bulk allocation.
+ *
+ * @return
+ *   The DevX object created, NULL otherwise and rte_errno is set.
+ */
+struct mlx5_devx_obj *
+mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx, uint32_t pd,
+					    uint32_t log_obj_size)
+{
+	uint32_t in[MLX5_ST_SZ_DW(create_conn_track_aso_in)] = {0};
+	uint32_t out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	struct mlx5_devx_obj *ct_aso_obj;
+	void *ptr;
+
+	ct_aso_obj = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ct_aso_obj),
+				 0, SOCKET_ID_ANY);
+	if (!ct_aso_obj) {
+		DRV_LOG(ERR, "Failed to allocate CONN_TRACK_OFFLOAD object.");
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	ptr = MLX5_ADDR_OF(create_conn_track_aso_in, in, hdr);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, obj_type,
+		 MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, log_obj_range, log_obj_size);
+	ptr = MLX5_ADDR_OF(create_conn_track_aso_in, in, conn_track_offload);
+	MLX5_SET(conn_track_offload, ptr, conn_track_aso_access_pd, pd);
+	ct_aso_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
+						     out, sizeof(out));
+	if (!ct_aso_obj->obj) {
+		rte_errno = errno;
+		DRV_LOG(ERR, "Failed to create CONN_TRACK_OFFLOAD obj by using DevX.");
+		mlx5_free(ct_aso_obj);
+		return NULL;
+	}
+	ct_aso_obj->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	return ct_aso_obj;
+}
+
 /**
  * Create general object of type GENEVE TLV option using DevX API.
  *
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 746320cf04..e67cea506d 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -569,6 +569,10 @@ struct mlx5_devx_obj *mlx5_devx_cmd_queue_counter_alloc(void *ctx);
 __rte_internal
 int mlx5_devx_cmd_queue_counter_query(struct mlx5_devx_obj *dcs, int clear,
 				      uint32_t *out_of_buffers);
+__rte_internal
+struct mlx5_devx_obj *mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx,
+					uint32_t pd, uint32_t log_obj_size);
+
 /**
  * Create general object of type FLOW_METER_ASO using DevX API..
  *
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index 18dc96276d..4bbcba5b8e 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -13,6 +13,7 @@ INTERNAL {
 	mlx5_dev_to_pci_addr; # WINDOWS_NO_EXPORT
 
 	mlx5_devx_cmd_alloc_pd;
+	mlx5_devx_cmd_create_conn_track_offload_obj;
 	mlx5_devx_cmd_create_cq;
 	mlx5_devx_cmd_create_flex_parser;
 	mlx5_devx_cmd_create_qp;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 06/17] net/mlx5: add modify support for CT
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (4 preceding siblings ...)
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 05/17] common/mlx5: add Dexv CT objects creation Bing Zhao
@ 2021-05-05  6:49   ` Bing Zhao
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 07/17] net/mlx5: add actions creating " Bing Zhao
                     ` (10 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After the connection tracking object bulk is allocated, all the
objects' contents are filled with zero by default. Every
new-allocated object must be modified via WQE operation before it is
used.

In order to reduce the latency for the flow creation, an asynchronous
way is used instead of busy waiting for the CQE to be generated.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |   8 +
 drivers/net/mlx5/mlx5_flow.h     |   3 +
 drivers/net/mlx5/mlx5_flow_aso.c | 252 +++++++++++++++++++++++++++++++
 3 files changed, 263 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1a5c78fa3a..1898a0401f 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -490,6 +490,7 @@ struct mlx5_aso_sq_elem {
 			uint16_t burst_size;
 		};
 		struct mlx5_aso_mtr *mtr;
+		struct mlx5_aso_ct_action *ct;
 	};
 };
 
@@ -1007,6 +1008,10 @@ struct mlx5_aso_ct_action {
 	bool is_original; /* The direction of the DR action to be used. */
 };
 
+/* CT action object state update. */
+#define MLX5_ASO_CT_UPDATE_STATE(c, s) \
+	__atomic_store_n(&((c)->state), (s), __ATOMIC_RELAXED)
+
 /* ASO connection tracking software pool definition. */
 struct mlx5_aso_ct_pool {
 	uint16_t index; /* Pool index in pools array. */
@@ -1690,5 +1695,8 @@ int mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr);
 int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr);
+int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			      struct mlx5_aso_ct_action *ct,
+			      const struct rte_flow_action_conntrack *profile);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 964e13a869..eb5b53ac6a 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -45,6 +45,7 @@ enum mlx5_rte_flow_action_type {
 enum {
 	MLX5_INDIRECT_ACTION_TYPE_RSS,
 	MLX5_INDIRECT_ACTION_TYPE_AGE,
+	MLX5_INDIRECT_ACTION_TYPE_AGE,
 };
 
 /* Matches on selected register. */
@@ -839,6 +840,8 @@ struct mlx5_flow {
 #define MLX5_ASO_WQE_CQE_RESPONSE_DELAY 10u
 #define MLX5_MTR_POLL_WQE_CQE_TIMES 100000u
 
+#define MLX5_CT_POLL_WQE_CQE_TIMES MLX5_MTR_POLL_WQE_CQE_TIMES
+
 #define MLX5_MAN_WIDTH 8
 /* Legacy Meter parameter structure. */
 struct mlx5_legacy_flow_meter {
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index c24d865284..0ff19e6171 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -887,3 +887,255 @@ mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 			mtr->offset);
 	return -1;
 }
+
+/*
+ * Post a WQE to the ASO CT SQ to modify the context.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ * @param[in] ct
+ *   Pointer to the generic CT structure related to the context.
+ * @param[in] profile
+ *   Pointer to configuration profile.
+ *
+ * @return
+ *   1 on success (WQE number), 0 on failure.
+ */
+static uint16_t
+mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
+			      struct mlx5_aso_ct_action *ct,
+			      const struct rte_flow_action_conntrack *profile)
+{
+	volatile struct mlx5_aso_wqe *wqe = NULL;
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t res;
+	struct mlx5_aso_ct_pool *pool;
+	void *desg;
+	void *orig_dir;
+	void *reply_dir;
+
+	rte_spinlock_lock(&sq->sqsl);
+	/* Prevent other threads to update the index. */
+	res = size - (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!res)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
+		return 0;
+	}
+	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
+	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
+	/* Fill next WQE. */
+	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
+	sq->elts[sq->head & mask].ct = ct;
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	/* Each WQE will have a single CT object. */
+	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
+						  ct->offset);
+	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
+			(ASO_OPC_MOD_CONNECTION_TRACKING <<
+			 WQE_CSEG_OPC_MOD_OFFSET) |
+			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
+	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
+			(0u |
+			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
+			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
+	wqe->aso_cseg.data_mask = UINT64_MAX;
+	/* To make compiler happy. */
+	desg = (void *)(uintptr_t)wqe->aso_dseg.data;
+	MLX5_SET(conn_track_aso, desg, valid, 1);
+	MLX5_SET(conn_track_aso, desg, state, profile->state);
+	MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable);
+	MLX5_SET(conn_track_aso, desg, connection_assured,
+		 profile->live_connection);
+	MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack);
+	MLX5_SET(conn_track_aso, desg, challenged_acked,
+		 profile->challenge_ack_passed);
+	/* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */
+	MLX5_SET(conn_track_aso, desg, heartbeat, 0);
+	MLX5_SET(conn_track_aso, desg, max_ack_window,
+		 profile->max_ack_window);
+	MLX5_SET(conn_track_aso, desg, retransmission_counter, 0);
+	MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0);
+	MLX5_SET(conn_track_aso, desg, retranmission_limit,
+		 profile->retransmission_limit);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale,
+		 profile->reply_dir.scale);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated,
+		 profile->reply_dir.close_initiated);
+	/* Both directions will use the same liberal mode. */
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled,
+		 profile->liberal_mode);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked,
+		 profile->reply_dir.data_unacked);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack,
+		 profile->reply_dir.last_ack_seen);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale,
+		 profile->original_dir.scale);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated,
+		 profile->original_dir.close_initiated);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled,
+		 profile->liberal_mode);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked,
+		 profile->original_dir.data_unacked);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack,
+		 profile->original_dir.last_ack_seen);
+	MLX5_SET(conn_track_aso, desg, last_win, profile->last_window);
+	MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction);
+	MLX5_SET(conn_track_aso, desg, last_index, profile->last_index);
+	MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq);
+	MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack);
+	MLX5_SET(conn_track_aso, desg, last_end, profile->last_end);
+	orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir);
+	MLX5_SET(tcp_window_params, orig_dir, sent_end,
+		 profile->original_dir.sent_end);
+	MLX5_SET(tcp_window_params, orig_dir, reply_end,
+		 profile->original_dir.reply_end);
+	MLX5_SET(tcp_window_params, orig_dir, max_win,
+		 profile->original_dir.max_win);
+	MLX5_SET(tcp_window_params, orig_dir, max_ack,
+		 profile->original_dir.max_ack);
+	reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir);
+	MLX5_SET(tcp_window_params, reply_dir, sent_end,
+		 profile->reply_dir.sent_end);
+	MLX5_SET(tcp_window_params, reply_dir, reply_end,
+		 profile->reply_dir.reply_end);
+	MLX5_SET(tcp_window_params, reply_dir, max_win,
+		 profile->reply_dir.max_win);
+	MLX5_SET(tcp_window_params, reply_dir, max_ack,
+		 profile->reply_dir.max_ack);
+	sq->head++;
+	sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
+	rte_io_wmb();
+	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
+	rte_wmb();
+	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
+	rte_wmb();
+	rte_spinlock_unlock(&sq->sqsl);
+	return 1;
+}
+
+/*
+ * Update the status field of CTs to indicate ready to be used by flows.
+ * A continuous number of CTs since last update.
+ *
+ * @param[in] sq
+ *   Pointer to ASO CT SQ.
+ * @param[in] num
+ *   Number of CT structures to be updated.
+ *
+ * @return
+ *   0 on success, a negative value.
+ */
+static void
+mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
+{
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t i;
+	struct mlx5_aso_ct_action *ct = NULL;
+	uint16_t idx;
+
+	for (i = 0; i < num; i++) {
+		idx = (uint16_t)((sq->tail + i) & mask);
+		ct = sq->elts[idx].ct;
+		MLX5_ASSERT(ct);
+		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
+	}
+}
+
+/*
+ * Handle completions from WQEs sent to ASO CT.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ */
+static void
+mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
+{
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	struct mlx5_aso_cq *cq = &sq->cq;
+	volatile struct mlx5_cqe *restrict cqe;
+	const uint32_t cq_size = 1 << cq->log_desc_n;
+	const uint32_t mask = cq_size - 1;
+	uint32_t idx;
+	uint32_t next_idx;
+	uint16_t max;
+	uint16_t n = 0;
+	int ret;
+
+	rte_spinlock_lock(&sq->sqsl);
+	max = (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!max)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		return;
+	}
+	next_idx = cq->cq_ci & mask;
+	do {
+		idx = next_idx;
+		next_idx = (cq->cq_ci + 1) & mask;
+		/* Need to confirm the position of the prefetch. */
+		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
+		cqe = &cq->cq_obj.cqes[idx];
+		ret = check_cqe(cqe, cq_size, cq->cq_ci);
+		/*
+		 * Be sure owner read is done before any other cookie field or
+		 * opaque field.
+		 */
+		rte_io_rmb();
+		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
+			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
+				break;
+			mlx5_aso_cqe_err_handle(sq);
+		} else {
+			n++;
+		}
+		cq->cq_ci++;
+	} while (1);
+	if (likely(n)) {
+		mlx5_aso_ct_status_update(sq, n);
+		sq->tail += n;
+		rte_io_wmb();
+		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
+	}
+	rte_spinlock_unlock(&sq->sqsl);
+}
+
+/*
+ * Update connection tracking ASO context by sending WQE.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ * @param[in] profile
+ *   Pointer to connection tracking TCP parameter.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			  struct mlx5_aso_ct_action *ct,
+			  const struct rte_flow_action_conntrack *profile)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+
+	MLX5_ASSERT(ct);
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		if (mlx5_aso_ct_sq_enqueue_single(mng, ct, profile))
+			return 0;
+		/* Waiting for wqe resource. */
+		rte_delay_us_sleep(10u);
+	} while (--poll_wqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+}
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 07/17] net/mlx5: add actions creating for CT
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (5 preceding siblings ...)
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 06/17] net/mlx5: add modify support for CT Bing Zhao
@ 2021-05-05  6:49   ` Bing Zhao
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 08/17] net/mlx5: close CT management structure Bing Zhao
                     ` (9 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Allocating a CT from the management pools and creating the DR actions
for both directions by default.

If there is no available connection tracking action, a new pool will
be created with a fixed size bulk allocation. Right now, all the
resources are controlled by the linked list.

The ASO connection tracking context associated with these actions
need to be updated via WQE before using for steering.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h         |   4 +
 drivers/net/mlx5/mlx5_flow.h    |  29 +++-
 drivers/net/mlx5/mlx5_flow_dv.c | 263 ++++++++++++++++++++++++++++++++
 3 files changed, 295 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1898a0401f..de18a59c8e 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -987,6 +987,10 @@ struct mlx5_bond_info {
 /* Number of connection tracking objects per pool: must be a power of 2. */
 #define MLX5_ASO_CT_ACTIONS_PER_POOL 64
 
+/* Generate incremental and unique CT index from pool and offset. */
+#define MLX5_MAKE_CT_IDX(pool, offset) \
+	((pool) * MLX5_ASO_CT_ACTIONS_PER_POOL + (offset) + 1)
+
 /* ASO Conntrack state. */
 enum mlx5_aso_ct_state {
 	ASO_CONNTRACK_FREE, /* Inactive, in the free list. */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index eb5b53ac6a..8f2bc7d2f6 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -45,7 +45,7 @@ enum mlx5_rte_flow_action_type {
 enum {
 	MLX5_INDIRECT_ACTION_TYPE_RSS,
 	MLX5_INDIRECT_ACTION_TYPE_AGE,
-	MLX5_INDIRECT_ACTION_TYPE_AGE,
+	MLX5_INDIRECT_ACTION_TYPE_CT,
 };
 
 /* Matches on selected register. */
@@ -1288,6 +1288,33 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
 	return &pool->mtrs[idx % MLX5_ASO_MTRS_PER_POOL];
 }
 
+/*
+ * Get ASO CT action by index.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index to the ASO CT action.
+ *
+ * @return
+ *   The specified ASO CT action pointer.
+ */
+static inline struct mlx5_aso_ct_action *
+flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_pool *pool;
+
+	idx--;
+	MLX5_ASSERT((idx / MLX5_ASO_CT_ACTIONS_PER_POOL) < mng->n);
+	/* Bit operation AND could be used. */
+	rte_rwlock_read_lock(&mng->resize_rwl);
+	pool = mng->pools[idx / MLX5_ASO_CT_ACTIONS_PER_POOL];
+	rte_rwlock_read_unlock(&mng->resize_rwl);
+	return &pool->actions[idx % MLX5_ASO_CT_ACTIONS_PER_POOL];
+}
+
 int mlx5_flow_group_to_table(struct rte_eth_dev *dev,
 			     const struct mlx5_flow_tunnel *tunnel,
 			     uint32_t group, uint32_t *table,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 0d022dff3f..c8ff693e4c 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11120,6 +11120,262 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
 	return age_idx;
 }
 
+/*
+ * Release an ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index of ASO CT action to release.
+ *
+ * @return
+ *   0 when CT action was removed, otherwise the number of references.
+ */
+static inline int
+flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
+	uint32_t ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
+
+	if (!ret) {
+		if (ct->dr_action_orig) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			claim_zero(mlx5_glue->destroy_flow_action
+					(ct->dr_action_orig));
+#endif
+			ct->dr_action_orig = NULL;
+		}
+		if (ct->dr_action_rply) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			claim_zero(mlx5_glue->destroy_flow_action
+					(ct->dr_action_rply));
+#endif
+			ct->dr_action_rply = NULL;
+		}
+		rte_spinlock_lock(&mng->ct_sl);
+		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
+		rte_spinlock_unlock(&mng->ct_sl);
+	}
+	return ret;
+}
+
+/*
+ * Resize the ASO CT pools array by 64 pools.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value and rte_errno is set.
+ */
+static int
+flow_dv_aso_ct_pools_resize(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	void *old_pools = mng->pools;
+	/* Magic number now, need a macro. */
+	uint32_t resize = mng->n + 64;
+	uint32_t mem_size = sizeof(struct mlx5_aso_ct_pool *) * resize;
+	void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);
+
+	if (!pools) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	rte_rwlock_write_lock(&mng->resize_rwl);
+	/* ASO SQ/QP was already initialized in the startup. */
+	if (old_pools) {
+		/* Realloc could be an alternative choice. */
+		rte_memcpy(pools, old_pools,
+			   mng->n * sizeof(struct mlx5_aso_ct_pool *));
+		mlx5_free(old_pools);
+	}
+	mng->n = resize;
+	mng->pools = pools;
+	rte_rwlock_write_unlock(&mng->resize_rwl);
+	return 0;
+}
+
+/*
+ * Create and initialize a new ASO CT pool.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] ct_free
+ *   Where to put the pointer of a new CT action.
+ *
+ * @return
+ *   The CT actions pool pointer and @p ct_free is set on success,
+ *   NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_aso_ct_pool *
+flow_dv_ct_pool_create(struct rte_eth_dev *dev,
+		       struct mlx5_aso_ct_action **ct_free)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_pool *pool = NULL;
+	struct mlx5_devx_obj *obj = NULL;
+	uint32_t i;
+	uint32_t log_obj_size = rte_log2_u32(MLX5_ASO_CT_ACTIONS_PER_POOL);
+
+	obj = mlx5_devx_cmd_create_conn_track_offload_obj(priv->sh->ctx,
+						priv->sh->pdn, log_obj_size);
+	if (!obj) {
+		rte_errno = ENODATA;
+		DRV_LOG(ERR, "Failed to create conn_track_offload_obj using DevX.");
+		return NULL;
+	}
+	pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool), 0, SOCKET_ID_ANY);
+	if (!pool) {
+		rte_errno = ENOMEM;
+		claim_zero(mlx5_devx_cmd_destroy(obj));
+		return NULL;
+	}
+	pool->devx_obj = obj;
+	pool->index = mng->next;
+	/* Resize pools array if there is no room for the new pool in it. */
+	if (pool->index == mng->n && flow_dv_aso_ct_pools_resize(dev)) {
+		claim_zero(mlx5_devx_cmd_destroy(obj));
+		mlx5_free(pool);
+		return NULL;
+	}
+	mng->pools[pool->index] = pool;
+	mng->next++;
+	/* Assign the first action in the new pool, the rest go to free list. */
+	*ct_free = &pool->actions[0];
+	/* Lock outside, the list operation is safe here. */
+	for (i = 1; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
+		/* refcnt is 0 when allocating the memory. */
+		pool->actions[i].offset = i;
+		LIST_INSERT_HEAD(&mng->free_cts, &pool->actions[i], next);
+	}
+	return pool;
+}
+
+/*
+ * Allocate a ASO CT action from free list.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Index to ASO CT action on success, 0 otherwise and rte_errno is set.
+ */
+static uint32_t
+flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_action *ct = NULL;
+	struct mlx5_aso_ct_pool *pool;
+	uint8_t reg_c;
+	uint32_t ct_idx;
+
+	MLX5_ASSERT(mng);
+	if (!priv->config.devx) {
+		rte_errno = ENOTSUP;
+		return 0;
+	}
+	/* Get a free CT action, if no, a new pool will be created. */
+	rte_spinlock_lock(&mng->ct_sl);
+	ct = LIST_FIRST(&mng->free_cts);
+	if (ct) {
+		LIST_REMOVE(ct, next);
+	} else if (!flow_dv_ct_pool_create(dev, &ct)) {
+		rte_spinlock_unlock(&mng->ct_sl);
+		rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_ACTION,
+				   NULL, "failed to create ASO CT pool");
+		return 0;
+	}
+	rte_spinlock_unlock(&mng->ct_sl);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	ct_idx = MLX5_MAKE_CT_IDX(pool->index, ct->offset);
+	/* 0: inactive, 1: created, 2+: used by flows. */
+	__atomic_store_n(&ct->refcnt, 1, __ATOMIC_RELAXED);
+	reg_c = mlx5_flow_get_reg_id(dev, MLX5_ASO_CONNTRACK, 0, error);
+	if (!ct->dr_action_orig) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+		ct->dr_action_orig = mlx5_glue->dv_create_flow_action_aso
+			(priv->sh->rx_domain, pool->devx_obj->obj,
+			 ct->offset,
+			 MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_INITIATOR,
+			 reg_c - REG_C_0);
+#else
+		RTE_SET_USED(reg_c);
+#endif
+		if (!ct->dr_action_orig) {
+			flow_dv_aso_ct_release(dev, ct_idx);
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					   "failed to create ASO CT action");
+			return 0;
+		}
+	}
+	if (!ct->dr_action_rply) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+		ct->dr_action_rply = mlx5_glue->dv_create_flow_action_aso
+			(priv->sh->rx_domain, pool->devx_obj->obj,
+			 ct->offset,
+			 MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_RESPONDER,
+			 reg_c - REG_C_0);
+#endif
+		if (!ct->dr_action_rply) {
+			flow_dv_aso_ct_release(dev, ct_idx);
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					   "failed to create ASO CT action");
+			return 0;
+		}
+	}
+	return ct_idx;
+}
+
+/*
+ * Create a conntrack object with context and actions by using ASO mechanism.
+ *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
+ * @param[in] pro
+ *   Pointer to conntrack information profile.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Index to conntrack object on success, 0 otherwise.
+ */
+static uint32_t
+flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
+				   const struct rte_flow_action_conntrack *pro,
+				   struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
+	struct mlx5_aso_ct_action *ct;
+	uint32_t idx;
+
+	if (!sh->ct_aso_en)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Connection is not supported");
+	idx = flow_dv_aso_ct_alloc(dev, error);
+	if (!idx)
+		return rte_flow_error_set(error, rte_errno,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Failed to allocate CT object");
+	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (mlx5_aso_ct_update_by_wqe(sh, ct, pro))
+		return rte_flow_error_set(error, EBUSY,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Failed to update CT");
+	return idx;
+}
+
 /**
  * Fill the flow with DV spec, lock free
  * (mutex should be acquired by caller).
@@ -13317,6 +13573,7 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 {
 	uint32_t idx = 0;
 	uint32_t ret = 0;
+	struct mlx5_priv *priv = dev->data->dev_private;
 
 	switch (action->type) {
 	case RTE_FLOW_ACTION_TYPE_RSS:
@@ -13337,6 +13594,12 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 							 (void *)(uintptr_t)idx;
 		}
 		break;
+	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+		ret = flow_dv_translate_create_conntrack(dev, action->conf,
+							 err);
+		idx = (MLX5_INDIRECT_ACTION_TYPE_CT <<
+		       MLX5_INDIRECT_ACTION_TYPE_OFFSET) | ret;
+		break;
 	default:
 		rte_flow_error_set(err, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
 				   NULL, "action type not supported");
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 08/17] net/mlx5: close CT management structure
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (6 preceding siblings ...)
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 07/17] net/mlx5: add actions creating " Bing Zhao
@ 2021-05-05  6:49   ` Bing Zhao
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 09/17] net/mlx5: add ASO CT query implementation Bing Zhao
                     ` (8 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When freeing the IB shared context during stopping a device, the
ASO connection tracking management structure should also be cleaned
up.

All the DR actions created should be destroyed. The structures need
to be freed and ASO CT QP should be released. In the meanwhile, the
allocated and registered memory region for query should also be
deregistered and then freed.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.c          | 56 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_aso.c |  4 +++
 2 files changed, 60 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 86dbe6d573..d563da109a 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -706,6 +706,60 @@ mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
 	return 0;
 }
 
+/*
+ * Close and release all the resources of the
+ * ASO connection tracking management structure.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object to free.
+ */
+static void
+mlx5_flow_aso_ct_mng_close(struct mlx5_dev_ctx_shared *sh)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	struct mlx5_aso_ct_pool *ct_pool;
+	struct mlx5_aso_ct_action *ct;
+	uint32_t idx;
+	uint32_t val;
+	uint32_t cnt;
+	int i;
+
+	mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
+	idx = mng->next;
+	while (idx--) {
+		cnt = 0;
+		ct_pool = mng->pools[idx];
+		for (i = 0; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
+			ct = &ct_pool->actions[i];
+			val = __atomic_fetch_sub(&ct->refcnt, 1,
+						 __ATOMIC_RELAXED);
+			MLX5_ASSERT(val == 1);
+			if (val > 1)
+				cnt++;
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			if (ct->dr_action_orig)
+				claim_zero(mlx5_glue->destroy_flow_action
+							(ct->dr_action_orig));
+			if (ct->dr_action_rply)
+				claim_zero(mlx5_glue->destroy_flow_action
+							(ct->dr_action_rply));
+#endif
+		}
+		claim_zero(mlx5_devx_cmd_destroy(ct_pool->devx_obj));
+		if (cnt) {
+			DRV_LOG(DEBUG, "%u ASO CT objects are being used in the pool %u",
+				cnt, i);
+		}
+		mlx5_free(ct_pool);
+		/* in case of failure. */
+		mng->next--;
+	}
+	mlx5_free(mng->pools);
+	mlx5_free(mng);
+	/* Management structure must be cleared to 0s during allocation. */
+	sh->ct_mng = NULL;
+}
+
 /**
  * Initialize the flow resources' indexed mempool.
  *
@@ -1508,6 +1562,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->mreg_cp_tbl)
 		mlx5_hlist_destroy(priv->mreg_cp_tbl);
 	mlx5_mprq_free_mp(dev);
+	if (priv->sh->ct_mng)
+		mlx5_flow_aso_ct_mng_close(priv->sh);
 	mlx5_os_free_shared_dr(priv);
 	if (priv->rss_conf.rss_key != NULL)
 		mlx5_free(priv->rss_conf.rss_key);
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 0ff19e6171..3c2350a6b8 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -372,6 +372,10 @@ mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
 	case ASO_OPC_MOD_POLICER:
 		sq = &sh->mtrmng->pools_mng.sq;
 		break;
+	case ASO_OPC_MOD_CONNECTION_TRACKING:
+		mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
+		sq = &sh->ct_mng->aso_sq;
+		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
 		return;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 09/17] net/mlx5: add ASO CT query implementation
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (7 preceding siblings ...)
  2021-05-05  6:49   ` [dpdk-dev] [PATCH v3 08/17] net/mlx5: close CT management structure Bing Zhao
@ 2021-05-05  6:50   ` Bing Zhao
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 10/17] net/mlx5: add ASO CT destroy handling Bing Zhao
                     ` (7 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:50 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After the connection tracking context is created and being used by
the flows, the context will be updated by the HW automatically after
a packet passed the CT validation. E.g., the ACK, SEQ, window and
state of CT can be updated with both direction traffic.

In order to query the updated contents of this context, a WQE should
be posted to the SQ with a return buffer. The data will be filled
into the buffer. And the profile will be filled with specific value.

During the execution of query command, the context may be updated.
The result of the query command may not be the latest one.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |  10 +-
 drivers/net/mlx5/mlx5_flow_aso.c | 245 +++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c  |  19 +++
 3 files changed, 273 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index de18a59c8e..d2827e78d7 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -490,7 +490,10 @@ struct mlx5_aso_sq_elem {
 			uint16_t burst_size;
 		};
 		struct mlx5_aso_mtr *mtr;
-		struct mlx5_aso_ct_action *ct;
+		struct {
+			struct mlx5_aso_ct_action *ct;
+			char *query_data;
+		};
 	};
 };
 
@@ -1702,5 +1705,10 @@ int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			      struct mlx5_aso_ct_action *ct,
 			      const struct rte_flow_action_conntrack *profile);
+int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+			   struct mlx5_aso_ct_action *ct);
+int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			     struct mlx5_aso_ct_action *ct,
+			     struct rte_flow_action_conntrack *profile);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 3c2350a6b8..3f7ed371bf 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -933,6 +933,7 @@ mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
 	/* Fill next WQE. */
 	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
 	sq->elts[sq->head & mask].ct = ct;
+	sq->elts[sq->head & mask].query_data = NULL;
 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
 	/* Each WQE will have a single CT object. */
 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
@@ -1048,9 +1049,95 @@ mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
 		ct = sq->elts[idx].ct;
 		MLX5_ASSERT(ct);
 		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
+		if (sq->elts[idx].query_data)
+			rte_memcpy(sq->elts[idx].query_data,
+				   (char *)((uintptr_t)sq->mr.addr + idx * 64),
+				   64);
 	}
 }
 
+/*
+ * Post a WQE to the ASO CT SQ to query the current context.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ * @param[in] ct
+ *   Pointer to the generic CT structure related to the context.
+ * @param[in] data
+ *   Pointer to data area to be filled.
+ *
+ * @return
+ *   1 on success (WQE number), 0 on failure.
+ */
+static int
+mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
+			    struct mlx5_aso_ct_action *ct, char *data)
+{
+	volatile struct mlx5_aso_wqe *wqe = NULL;
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t res;
+	uint16_t wqe_idx;
+	struct mlx5_aso_ct_pool *pool;
+	enum mlx5_aso_ct_state state =
+				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+
+	if (state == ASO_CONNTRACK_FREE) {
+		DRV_LOG(ERR, "Fail: No context to query");
+		return -1;
+	} else if (state == ASO_CONNTRACK_WAIT) {
+		return 0;
+	}
+	rte_spinlock_lock(&sq->sqsl);
+	res = size - (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!res)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
+		return 0;
+	}
+	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY);
+	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
+	/* Confirm the location and address of the prefetch instruction. */
+	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
+	/* Fill next WQE. */
+	wqe_idx = sq->head & mask;
+	sq->elts[wqe_idx].ct = ct;
+	sq->elts[wqe_idx].query_data = data;
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	/* Each WQE will have a single CT object. */
+	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
+						  ct->offset);
+	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
+			(ASO_OPC_MOD_CONNECTION_TRACKING <<
+			 WQE_CSEG_OPC_MOD_OFFSET) |
+			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
+	/*
+	 * There is no write request is required.
+	 * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
+	 * "BYTEWISE_64BYTE" is needed for a whole context.
+	 * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
+	 * "data_mask" is ignored.
+	 * Buffer address was already filled during initialization.
+	 */
+	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE <<
+					ASO_CSEG_DATA_MASK_MODE_OFFSET);
+	wqe->aso_cseg.data_mask = 0;
+	sq->head++;
+	/*
+	 * Each WQE contains 2 WQEBB's, even though
+	 * data segment is not used in this case.
+	 */
+	sq->pi += 2;
+	rte_io_wmb();
+	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
+	rte_wmb();
+	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
+	rte_wmb();
+	rte_spinlock_unlock(&sq->sqsl);
+	return 1;
+}
+
 /*
  * Handle completions from WQEs sent to ASO CT.
  *
@@ -1143,3 +1230,161 @@ mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		ct->offset, pool->index);
 	return -1;
 }
+
+/*
+ * The routine is used to wait for WQE completion to continue with queried data.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+		       struct mlx5_aso_ct_action *ct)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+
+	if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+	    ASO_CONNTRACK_READY)
+		return 0;
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+		    ASO_CONNTRACK_READY)
+			return 0;
+		/* Waiting for CQE ready, consider should block or sleep. */
+		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
+	} while (--poll_cqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+}
+
+/*
+ * Convert the hardware conntrack data format into the profile.
+ *
+ * @param[in] profile
+ *   Pointer to conntrack profile to be filled after query.
+ * @param[in] wdata
+ *   Pointer to data fetched from hardware.
+ */
+static inline void
+mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
+			char *wdata)
+{
+	void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
+	void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
+
+	/* MLX5_GET16 should be taken into consideration. */
+	profile->state = (enum rte_flow_conntrack_state)
+			 MLX5_GET(conn_track_aso, wdata, state);
+	profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
+	profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
+					  sack_permitted);
+	profile->live_connection = MLX5_GET(conn_track_aso, wdata,
+					    connection_assured);
+	profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
+						 challenged_acked);
+	profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
+					   max_ack_window);
+	profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
+						 retranmission_limit);
+	profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
+	profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
+	profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
+			      MLX5_GET(conn_track_aso, wdata, last_index);
+	profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
+	profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
+	profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
+	profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
+				reply_direction_tcp_liberal_enabled) |
+				MLX5_GET(conn_track_aso, wdata,
+				original_direction_tcp_liberal_enabled);
+	/* No liberal in the RTE structure profile. */
+	profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					    reply_direction_tcp_scale);
+	profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_close_initiated);
+	profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_data_unacked);
+	profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_max_ack);
+	profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
+					       r_dir, sent_end);
+	profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
+						r_dir, reply_end);
+	profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
+					      r_dir, max_win);
+	profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
+					      r_dir, max_ack);
+	profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					       original_direction_tcp_scale);
+	profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_close_initiated);
+	profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_data_unacked);
+	profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_max_ack);
+	profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
+						  o_dir, sent_end);
+	profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
+						   o_dir, reply_end);
+	profile->original_dir.max_win = MLX5_GET(tcp_window_params,
+						 o_dir, max_win);
+	profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
+						 o_dir, max_ack);
+}
+
+/*
+ * Query connection tracking information parameter by send WQE.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ * @param[out] profile
+ *   Pointer to connection tracking TCP information.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			 struct mlx5_aso_ct_action *ct,
+			 struct rte_flow_action_conntrack *profile)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+	char out_data[64 * 2];
+	int ret;
+
+	MLX5_ASSERT(ct);
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		ret = mlx5_aso_ct_sq_query_single(mng, ct, out_data);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			goto data_handle;
+		/* Waiting for wqe resource or state. */
+		else
+			rte_delay_us_sleep(10u);
+	} while (--poll_wqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+data_handle:
+	ret = mlx5_aso_ct_wait_ready(sh, ct);
+	if (!ret)
+		mlx5_aso_ct_obj_analyze(profile, out_data);
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index c8ff693e4c..84e7f0b3d3 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -13775,6 +13775,8 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 	uint32_t act_idx = (uint32_t)(uintptr_t)handle;
 	uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
 
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
@@ -13788,6 +13790,23 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 			resp->sec_since_last_hit = __atomic_load_n
 			     (&age_param->sec_since_last_hit, __ATOMIC_RELAXED);
 		return 0;
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		ct = flow_aso_ct_get_by_idx(dev, idx);
+		if (!ct->refcnt)
+			return rte_flow_error_set(error, EFAULT,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"CT object is inactive");
+		((struct rte_flow_action_conntrack *)data)->peer_port =
+							ct->peer;
+		((struct rte_flow_action_conntrack *)data)->is_original_dir =
+							ct->is_original;
+		if (mlx5_aso_ct_query_by_wqe(priv->sh, ct, data))
+			return rte_flow_error_set(error, EIO,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"Failed to query CT context");
+		return 0;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 10/17] net/mlx5: add ASO CT destroy handling
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (8 preceding siblings ...)
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 09/17] net/mlx5: add ASO CT query implementation Bing Zhao
@ 2021-05-05  6:50   ` Bing Zhao
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 11/17] net/mlx5: add translation of CT action Bing Zhao
                     ` (6 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:50 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When trying to destroy an ASO connection tracking context, the DR
action created on this context should also be destroyed. Before
inserting the related software object into the management free list,
the reference count should be checked.

Right now, the context object will not be freed to the system and
will be reused directly from the free list.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_dv.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 84e7f0b3d3..0fa0671ace 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11136,9 +11136,15 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	uint32_t ret;
 	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
-	uint32_t ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
+	enum mlx5_aso_ct_state state =
+			__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
 
+	/* Cannot release when CT is in the ASO SQ. */
+	if (state == ASO_CONNTRACK_WAIT || state == ASO_CONNTRACK_QUERY)
+		return -1;
+	ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
 	if (!ret) {
 		if (ct->dr_action_orig) {
 #ifdef HAVE_MLX5_DR_ACTION_ASO_CT
@@ -11154,6 +11160,8 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 #endif
 			ct->dr_action_rply = NULL;
 		}
+		/* Clear the state to free, no need in 1st allocation. */
+		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_FREE);
 		rte_spinlock_lock(&mng->ct_sl);
 		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
 		rte_spinlock_unlock(&mng->ct_sl);
@@ -13648,6 +13656,12 @@ flow_dv_action_destroy(struct rte_eth_dev *dev,
 			DRV_LOG(DEBUG, "Indirect age action %" PRIu32 " was"
 				" released with references %d.", idx, ret);
 		return 0;
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		ret = flow_dv_aso_ct_release(dev, idx);
+		if (ret)
+			DRV_LOG(DEBUG, "Connection tracking object %u still "
+				"has references %d.", idx, ret);
+		return 0;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 11/17] net/mlx5: add translation of CT action
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (9 preceding siblings ...)
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 10/17] net/mlx5: add ASO CT destroy handling Bing Zhao
@ 2021-05-05  6:50   ` Bing Zhao
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 12/17] net/mlx5: add translation of CT item Bing Zhao
                     ` (5 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:50 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When creating a flow with this action context for CT, it needs to be
translated in 2 levels.

First, retrieve from action context to rte_flow action.
Second, translate it to the corresponding DR action with traffic
direction that was specified when creating or updating via
rte_flow_action_handle* API.

Before using the DR action in a flow, the CT context should be
available to use in the hardware. A synchronization is done before
inserting the flow rule with CT action to check the HW availability
of this CT context.

In order to release the DR actions and reuse the context of a CT,
the reference count should also be handled in the flow rule
destroying.

The CT index will be recorded in the rte_flow by reusing the ASO age
index to save memory, since only one ASO action is supported in one
flow rule currently. The action context type should also be saved
for CT. When destroying a flow rule, if the context type is CT and
the index is valid (non-zero), the release process should be
handled. By default, the handling will fall back to try to release
the ASO age if any.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |  2 ++
 drivers/net/mlx5/mlx5_flow.c     |  9 +++++++
 drivers/net/mlx5/mlx5_flow.h     |  7 +++++-
 drivers/net/mlx5/mlx5_flow_aso.c | 41 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c  | 28 +++++++++++++++++++++-
 5 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d2827e78d7..d01a10ea54 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1710,5 +1710,7 @@ int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
 int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			     struct mlx5_aso_ct_action *ct,
 			     struct rte_flow_action_conntrack *profile);
+int mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
+			  struct mlx5_aso_ct_action *ct);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index edad6007a8..f36eeae03f 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -3553,6 +3553,15 @@ flow_action_handles_translate(struct rte_eth_dev *dev,
 				break;
 			}
 			/* Fall-through */
+		case MLX5_INDIRECT_ACTION_TYPE_CT:
+			if (priv->sh->ct_aso_en) {
+				translated[handle->index].type =
+					RTE_FLOW_ACTION_TYPE_CONNTRACK;
+				translated[handle->index].conf =
+							 (void *)(uintptr_t)idx;
+				break;
+			}
+			/* Fall-through */
 		default:
 			mlx5_free(translated);
 			return rte_flow_error_set
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 8f2bc7d2f6..286e3fb6a4 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -226,6 +226,7 @@ enum mlx5_feature_name {
 #define MLX5_FLOW_ACTION_TUNNEL_MATCH (1ull << 38)
 #define MLX5_FLOW_ACTION_MODIFY_FIELD (1ull << 39)
 #define MLX5_FLOW_ACTION_METER_WITH_TERMINATED_POLICY (1ull << 40)
+#define MLX5_FLOW_ACTION_CT (1ull << 41)
 
 #define MLX5_FLOW_FATE_ACTIONS \
 	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | \
@@ -969,11 +970,15 @@ struct rte_flow {
 	uint32_t drv_type:2; /**< Driver type. */
 	uint32_t tunnel:1;
 	uint32_t meter:24; /**< Holds flow meter id. */
+	uint32_t indirect_type:2; /**< Indirect action type. */
 	uint32_t rix_mreg_copy;
 	/**< Index to metadata register copy table resource. */
 	uint32_t counter; /**< Holds flow counter. */
 	uint32_t tunnel_id;  /**< Tunnel id */
-	uint32_t age; /**< Holds ASO age bit index. */
+	union {
+		uint32_t age; /**< Holds ASO age bit index. */
+		uint32_t ct; /**< Holds ASO CT index. */
+	};
 	uint32_t geneve_tlv_option; /**< Holds Geneve TLV option id. > */
 } __rte_packed;
 
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 3f7ed371bf..d0a989e213 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -1388,3 +1388,44 @@ mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		mlx5_aso_ct_obj_analyze(profile, out_data);
 	return ret;
 }
+
+/*
+ * Make sure the conntrack context is synchronized with hardware before
+ * creating a flow rule that uses it.
+ *
+ * @param[in] sh
+ *   Pointer to shared device context.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
+		      struct mlx5_aso_ct_action *ct)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	enum mlx5_aso_ct_state state =
+				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+
+	if (state == ASO_CONNTRACK_FREE) {
+		rte_errno = ENXIO;
+		return -rte_errno;
+	} else if (state == ASO_CONNTRACK_READY ||
+		   state == ASO_CONNTRACK_QUERY) {
+		return 0;
+	}
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+		if (state == ASO_CONNTRACK_READY ||
+		    state == ASO_CONNTRACK_QUERY)
+			return 0;
+		/* Waiting for CQE ready, consider should block or sleep. */
+		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
+	} while (--poll_cqe_times);
+	rte_errno = EBUSY;
+	return -rte_errno;
+}
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 0fa0671ace..14af900267 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11381,6 +11381,7 @@ flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
 		return rte_flow_error_set(error, EBUSY,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to update CT");
+	ct->is_original = !!pro->is_original_dir;
 	return idx;
 }
 
@@ -11544,6 +11545,8 @@ flow_dv_translate(struct rte_eth_dev *dev,
 		const struct rte_flow_action *found_action = NULL;
 		uint32_t jump_group = 0;
 		struct mlx5_flow_counter *cnt;
+		uint32_t ct_idx;
+		struct mlx5_aso_ct_action *ct;
 
 		if (!mlx5_flow_os_action_supported(action_type))
 			return rte_flow_error_set(error, ENOTSUP,
@@ -12017,6 +12020,26 @@ flow_dv_translate(struct rte_eth_dev *dev,
 				return -rte_errno;
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			break;
+		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+			ct_idx = (uint32_t)(uintptr_t)action->conf;
+			ct = flow_aso_ct_get_by_idx(dev, ct_idx);
+			if (mlx5_aso_ct_available(priv->sh, ct))
+				return rte_flow_error_set(error, rte_errno,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						NULL,
+						"CT is unavailable.");
+			if (ct->is_original)
+				dev_flow->dv.actions[actions_n] =
+							ct->dr_action_orig;
+			else
+				dev_flow->dv.actions[actions_n] =
+							ct->dr_action_rply;
+			flow->indirect_type = MLX5_INDIRECT_ACTION_TYPE_CT;
+			flow->ct = ct_idx;
+			__atomic_fetch_add(&ct->refcnt, 1, __ATOMIC_RELAXED);
+			actions_n++;
+			action_flags |= MLX5_FLOW_ACTION_CT;
+			break;
 		case RTE_FLOW_ACTION_TYPE_END:
 			actions_end = true;
 			if (mhdr_res->actions_num) {
@@ -13152,7 +13175,10 @@ flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
 			mlx5_flow_meter_detach(priv, fm);
 		flow->meter = 0;
 	}
-	if (flow->age)
+	/* Keep the current age handling by default. */
+	if (flow->indirect_type == MLX5_INDIRECT_ACTION_TYPE_CT && flow->ct)
+		flow_dv_aso_ct_release(dev, flow->ct);
+	else if (flow->age)
 		flow_dv_aso_age_release(dev, flow->age);
 	if (flow->geneve_tlv_option) {
 		flow_dv_geneve_tlv_option_resource_release(dev);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 12/17] net/mlx5: add translation of CT item
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (10 preceding siblings ...)
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 11/17] net/mlx5: add translation of CT action Bing Zhao
@ 2021-05-05  6:50   ` Bing Zhao
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 13/17] net/mlx5: add CT context update Bing Zhao
                     ` (4 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:50 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The return register of the DR action will be used for matching.
After the ASO CT checking of a TCP packet, the syndrome is filled in
the register. Only the 8 LSB should be used. A converting from
RTE_FLOW_CONNTRACK_FLAG* to the syndrome should be done after
checing the spec and mask fields.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    |  7 ++++
 drivers/net/mlx5/mlx5_flow_dv.c | 62 +++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 286e3fb6a4..eb0bb42161 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -405,6 +405,13 @@ enum mlx5_feature_name {
 /* Maximum number of fields to modify in MODIFY_FIELD */
 #define MLX5_ACT_MAX_MOD_FIELDS 5
 
+/* Syndrome bits definition for connection tracking. */
+#define MLX5_CT_SYNDROME_VALID		(0x0 << 6)
+#define MLX5_CT_SYNDROME_INVALID	(0x1 << 6)
+#define MLX5_CT_SYNDROME_TRAP		(0x2 << 6)
+#define MLX5_CT_SYNDROME_STATE_CHANGE	(0x1 << 1)
+#define MLX5_CT_SYNDROME_BAD_PACKET	(0x1 << 0)
+
 enum mlx5_flow_drv_type {
 	MLX5_FLOW_TYPE_MIN,
 	MLX5_FLOW_TYPE_DV,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 14af900267..b0858e3df8 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -9379,6 +9379,64 @@ flow_dv_translate_item_ecpri(struct rte_eth_dev *dev, void *matcher,
 	}
 }
 
+/*
+ * Add connection tracking status item to matcher
+ *
+ * @param[in] dev
+ *   The devich to configure through.
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ */
+static void
+flow_dv_translate_item_aso_ct(struct rte_eth_dev *dev,
+			      void *matcher, void *key,
+			      const struct rte_flow_item *item)
+{
+	uint32_t reg_value = 0;
+	int reg_id;
+	/* 8LSB 0b 11/0000/11, middle 4 bits are reserved. */
+	uint32_t reg_mask = 0;
+	const struct rte_flow_item_conntrack *spec = item->spec;
+	const struct rte_flow_item_conntrack *mask = item->mask;
+	uint32_t flags;
+	struct rte_flow_error error;
+
+	if (!mask)
+		mask = &rte_flow_item_conntrack_mask;
+	if (!spec || !mask->flags)
+		return;
+	flags = spec->flags & mask->flags;
+	/* The conflict should be checked in the validation. */
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_VALID)
+		reg_value |= MLX5_CT_SYNDROME_VALID;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_CHANGED)
+		reg_value |= MLX5_CT_SYNDROME_STATE_CHANGE;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_INVALID)
+		reg_value |= MLX5_CT_SYNDROME_INVALID;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED)
+		reg_value |= MLX5_CT_SYNDROME_TRAP;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD)
+		reg_value |= MLX5_CT_SYNDROME_BAD_PACKET;
+	if (mask->flags & (RTE_FLOW_CONNTRACK_PKT_STATE_VALID |
+			   RTE_FLOW_CONNTRACK_PKT_STATE_INVALID |
+			   RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED))
+		reg_mask |= 0xc0;
+	if (mask->flags & RTE_FLOW_CONNTRACK_PKT_STATE_CHANGED)
+		reg_mask |= MLX5_CT_SYNDROME_STATE_CHANGE;
+	if (mask->flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD)
+		reg_mask |= MLX5_CT_SYNDROME_BAD_PACKET;
+	/* The REG_C_x value could be saved during startup. */
+	reg_id = mlx5_flow_get_reg_id(dev, MLX5_ASO_CONNTRACK, 0, &error);
+	if (reg_id == REG_NON)
+		return;
+	flow_dv_match_meta_reg(matcher, key, (enum modify_reg)reg_id,
+			       reg_value, reg_mask);
+}
+
 static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
 
 #define HEADER_IS_ZERO(match_criteria, headers)				     \
@@ -12322,6 +12380,10 @@ flow_dv_translate(struct rte_eth_dev *dev,
 			/* No other protocol should follow eCPRI layer. */
 			last_item = MLX5_FLOW_LAYER_ECPRI;
 			break;
+		case RTE_FLOW_ITEM_TYPE_CONNTRACK:
+			flow_dv_translate_item_aso_ct(dev, match_mask,
+						      match_value, items);
+			break;
 		default:
 			break;
 		}
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 13/17] net/mlx5: add CT context update
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (11 preceding siblings ...)
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 12/17] net/mlx5: add translation of CT item Bing Zhao
@ 2021-05-05  6:50   ` Bing Zhao
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 14/17] net/mlx5: validation of CT action Bing Zhao
                     ` (3 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:50 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When updating a connection tracking context, two separate parts
could be updated.
First, the direction. This will only update the traffic direction
recorded in the software for flow creation.
Second, the TCP parameters. The hardware context will be updated
via the WQE. This update will be blocked until the hardware status
is updated and ready for the next flow creation.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_dv.c | 56 +++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index b0858e3df8..6afbbbc4bb 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -13824,6 +13824,60 @@ __flow_dv_action_rss_update(struct rte_eth_dev *dev, uint32_t idx,
 	return ret;
 }
 
+/*
+ * Updates in place conntrack context or direction.
+ * Context update should be synchronized.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   The conntrack object ID to be updated.
+ * @param[in] update
+ *   Pointer to the structure of information to update.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value.
+ */
+static int
+__flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
+			   const struct rte_flow_modify_conntrack *update,
+			   struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
+	const struct rte_flow_action_conntrack *new_prf;
+	int ret = 0;
+
+	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (!ct->refcnt)
+		return rte_flow_error_set(error, ENOMEM,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "CT object is inactive");
+	new_prf = &update->new_ct;
+	if (update->direction)
+		ct->is_original = !!new_prf->is_original_dir;
+	if (update->state) {
+		ret = mlx5_aso_ct_update_by_wqe(priv->sh, ct, new_prf);
+		if (ret)
+			return rte_flow_error_set(error, EIO,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"Failed to send CT context update WQE");
+		/* Block until ready or a failure. */
+		ret = mlx5_aso_ct_available(priv->sh, ct);
+		if (ret)
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					   NULL,
+					   "Timeout to get the CT update");
+	}
+	return ret;
+}
+
 /**
  * Updates in place shared action configuration, lock free,
  * (mutex should be acquired by caller).
@@ -13859,6 +13913,8 @@ flow_dv_action_update(struct rte_eth_dev *dev,
 	case MLX5_INDIRECT_ACTION_TYPE_RSS:
 		action_conf = ((const struct rte_flow_action *)update)->conf;
 		return __flow_dv_action_rss_update(dev, idx, action_conf, err);
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		return __flow_dv_action_ct_update(dev, idx, update, err);
 	default:
 		return rte_flow_error_set(err, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 14/17] net/mlx5: validation of CT action
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (12 preceding siblings ...)
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 13/17] net/mlx5: add CT context update Bing Zhao
@ 2021-05-05  6:50   ` Bing Zhao
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 15/17] net/mlx5: validation of CT item Bing Zhao
                     ` (2 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:50 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The validation of a CT action contains two parts. The first is the
CT action configurations parameter. When creating a CT action
context, some members need to be verified.

The second is that when creating a flow, the DR action of CT should
be validated with other actions and items as well. Currently, only
the TCP protocol support connection tracking.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h         |  4 ++
 drivers/net/mlx5/mlx5_flow.c    | 31 +++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c | 69 +++++++++++++++++++++++++++++++++
 3 files changed, 104 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d01a10ea54..36b7f05822 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1611,6 +1611,10 @@ int mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow,
 void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
 int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
 			uint32_t nb_contexts, struct rte_flow_error *error);
+int mlx5_validate_action_ct(struct rte_eth_dev *dev,
+			    const struct rte_flow_action_conntrack *conntrack,
+			    struct rte_flow_error *error);
+
 
 /* mlx5_mp_os.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index f36eeae03f..6baaefbaba 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1688,6 +1688,37 @@ mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
 	return 0;
 }
 
+/*
+ * Validate the ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] conntrack
+ *   Pointer to the CT action profile.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_validate_action_ct(struct rte_eth_dev *dev,
+			const struct rte_flow_action_conntrack *conntrack,
+			struct rte_flow_error *error)
+{
+	RTE_SET_USED(dev);
+
+	if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Invalid CT state");
+	if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Invalid last TCP packet flag");
+	return 0;
+}
+
 /**
  * Verify the @p attributes will be correctly understood by the NIC and store
  * them in the @p flow if everything is correct.
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 6afbbbc4bb..f2a2c609e2 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -3400,6 +3400,57 @@ flow_dv_validate_action_raw_encap_decap
 	return 0;
 }
 
+/*
+ * Validate the ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] action_flags
+ *   Holds the actions detected until now.
+ * @param[in] item_flags
+ *   The items found in this flow rule.
+ * @param[in] attr
+ *   Pointer to flow attributes.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_action_aso_ct(struct rte_eth_dev *dev,
+			       uint64_t action_flags,
+			       uint64_t item_flags,
+			       const struct rte_flow_attr *attr,
+			       struct rte_flow_error *error)
+{
+	RTE_SET_USED(dev);
+
+	if (attr->group == 0 && !attr->transfer)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "Only support non-root table");
+	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "CT cannot follow a fate action");
+	if ((action_flags & MLX5_FLOW_ACTION_METER) ||
+	    (action_flags & MLX5_FLOW_ACTION_AGE))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Only one ASO action is supported");
+	if (action_flags & MLX5_FLOW_ACTION_ENCAP)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Encap cannot exist before CT");
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					  "Not a outer TCP packet");
+	return 0;
+}
+
 /**
  * Match encap_decap resource.
  *
@@ -7205,6 +7256,14 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			rw_act_num += ret;
 			break;
+		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+			ret = flow_dv_validate_action_aso_ct(dev, action_flags,
+							     item_flags, attr,
+							     error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_FLOW_ACTION_CT;
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ACTION,
@@ -13861,6 +13920,10 @@ __flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
 	if (update->direction)
 		ct->is_original = !!new_prf->is_original_dir;
 	if (update->state) {
+		/* Only validate the profile when it needs to be updated. */
+		ret = mlx5_validate_action_ct(dev, new_prf, error);
+		if (ret)
+			return ret;
 		ret = mlx5_aso_ct_update_by_wqe(priv->sh, ct, new_prf);
 		if (ret)
 			return rte_flow_error_set(error, EIO,
@@ -15732,6 +15795,12 @@ flow_dv_action_validate(struct rte_eth_dev *dev,
 						NULL,
 					     "shared age action not supported");
 		return flow_dv_validate_action_age(0, action, dev, err);
+	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+		if (!priv->sh->ct_aso_en)
+			return rte_flow_error_set(err, ENOTSUP,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					"ASO CT is not supported");
+		return mlx5_validate_action_ct(dev, action->conf, err);
 	default:
 		return rte_flow_error_set(err, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 15/17] net/mlx5: validation of CT item
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (13 preceding siblings ...)
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 14/17] net/mlx5: validation of CT action Bing Zhao
@ 2021-05-05  6:50   ` Bing Zhao
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 16/17] net/mlx5: add support of CT between two ports Bing Zhao
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 17/17] doc: update mlx5 support for conntrack Bing Zhao
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:50 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The item of ASO connection tracking will be translated into the
register value when matching. The validation of this item has no
dependency on other layers, since the flow including this item
should be jumped from another group. All the layers checking was
already done in the previous groups. Only the state bits conflict
should be checked.

It is assumed that the flow with CT item will always work on the
TCP traffic.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    |  3 ++
 drivers/net/mlx5/mlx5_flow_dv.c | 51 +++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index eb0bb42161..238befa2d4 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -147,6 +147,9 @@ enum mlx5_feature_name {
 #define MLX5_FLOW_LAYER_GENEVE_OPT (UINT64_C(1) << 32)
 #define MLX5_FLOW_LAYER_GTP_PSC (UINT64_C(1) << 33)
 
+/* Conntrack item. */
+#define MLX5_FLOW_LAYER_ASO_CT (UINT64_C(1) << 34)
+
 /* Outer Masks. */
 #define MLX5_FLOW_LAYER_OUTER_L3 \
 	(MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index f2a2c609e2..aa0a5acdca 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -2598,6 +2598,51 @@ flow_dv_validate_item_ipv6_frag_ext(const struct rte_flow_item *item,
 				  "specified range not supported");
 }
 
+/*
+ * Validate ASO CT item.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Pointer to bit-fields that holds the items detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_item_aso_ct(struct rte_eth_dev *dev,
+			     const struct rte_flow_item *item,
+			     uint64_t *item_flags,
+			     struct rte_flow_error *error)
+{
+	const struct rte_flow_item_conntrack *spec = item->spec;
+	const struct rte_flow_item_conntrack *mask = item->mask;
+	RTE_SET_USED(dev);
+	uint32_t flags;
+
+	if (*item_flags & MLX5_FLOW_LAYER_ASO_CT)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+					  "Only one CT is supported");
+	if (!mask)
+		mask = &rte_flow_item_conntrack_mask;
+	flags = spec->flags & mask->flags;
+	if ((flags & RTE_FLOW_CONNTRACK_PKT_STATE_VALID) &&
+	    ((flags & RTE_FLOW_CONNTRACK_PKT_STATE_INVALID) ||
+	     (flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD) ||
+	     (flags & RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED)))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+					  "Conflict status bits");
+	/* State change also needs to be considered. */
+	*item_flags |= MLX5_FLOW_LAYER_ASO_CT;
+	return 0;
+}
+
 /**
  * Validate the pop VLAN action.
  *
@@ -6696,6 +6741,12 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 				return ret;
 			last_item = MLX5_FLOW_LAYER_ECPRI;
 			break;
+		case RTE_FLOW_ITEM_TYPE_CONNTRACK:
+			ret = flow_dv_validate_item_aso_ct(dev, items,
+							   &item_flags, error);
+			if (ret < 0)
+				return ret;
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ITEM,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 16/17] net/mlx5: add support of CT between two ports
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (14 preceding siblings ...)
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 15/17] net/mlx5: validation of CT item Bing Zhao
@ 2021-05-05  6:50   ` Bing Zhao
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 17/17] doc: update mlx5 support for conntrack Bing Zhao
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:50 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After creating a connection tracking context, it can be used between
two ports. For each port, the flow for one direction traffic will
be created.

The context can only be shared between the owner port and the peer
port that was specified when being created. Only the owner port
could update the context or query it in current implementation.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    | 57 +++++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_flow_dv.c | 71 +++++++++++++++++++++++++--------
 2 files changed, 110 insertions(+), 18 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 238befa2d4..ddaba40f72 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -48,6 +48,25 @@ enum {
 	MLX5_INDIRECT_ACTION_TYPE_CT,
 };
 
+/* Now, the maximal ports will be supported is 256, action number is 4M. */
+#define MLX5_INDIRECT_ACT_CT_MAX_PORT 0x100
+
+#define MLX5_INDIRECT_ACT_CT_OWNER_SHIFT 22
+#define MLX5_INDIRECT_ACT_CT_OWNER_MASK (MLX5_INDIRECT_ACT_CT_MAX_PORT - 1)
+
+/* 30-31: type, 22-29: owner port, 0-21: index. */
+#define MLX5_INDIRECT_ACT_CT_GEN_IDX(owner, index) \
+	((MLX5_INDIRECT_ACTION_TYPE_CT << MLX5_INDIRECT_ACTION_TYPE_OFFSET) | \
+	 (((owner) & MLX5_INDIRECT_ACT_CT_OWNER_MASK) << \
+	  MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) | (index))
+
+#define MLX5_INDIRECT_ACT_CT_GET_OWNER(index) \
+	(((index) >> MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) & \
+	 MLX5_INDIRECT_ACT_CT_OWNER_MASK)
+
+#define MLX5_INDIRECT_ACT_CT_GET_IDX(index) \
+	((index) & ((1 << MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) - 1))
+
 /* Matches on selected register. */
 struct mlx5_rte_flow_item_tag {
 	enum modify_reg id;
@@ -1304,7 +1323,7 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
 }
 
 /*
- * Get ASO CT action by index.
+ * Get ASO CT action by device and index.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
@@ -1315,7 +1334,7 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
  *   The specified ASO CT action pointer.
  */
 static inline struct mlx5_aso_ct_action *
-flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
+flow_aso_ct_get_by_dev_idx(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
@@ -1330,6 +1349,40 @@ flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
 	return &pool->actions[idx % MLX5_ASO_CT_ACTIONS_PER_POOL];
 }
 
+/*
+ * Get ASO CT action by owner & index.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index to the ASO CT action and owner port combination.
+ *
+ * @return
+ *   The specified ASO CT action pointer.
+ */
+static inline struct mlx5_aso_ct_action *
+flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t own_idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(own_idx);
+	uint32_t idx = MLX5_INDIRECT_ACT_CT_GET_IDX(own_idx);
+
+	if (owner == PORT_ID(priv)) {
+		ct = flow_aso_ct_get_by_dev_idx(dev, idx);
+	} else {
+		struct rte_eth_dev *owndev = &rte_eth_devices[owner];
+
+		MLX5_ASSERT(owner < RTE_MAX_ETHPORTS);
+		if (dev->data->dev_started != 1)
+			return NULL;
+		ct = flow_aso_ct_get_by_dev_idx(owndev, idx);
+		if (ct->peer != PORT_ID(priv))
+			return NULL;
+	}
+	return ct;
+}
+
 int mlx5_flow_group_to_table(struct rte_eth_dev *dev,
 			     const struct mlx5_flow_tunnel *tunnel,
 			     uint32_t group, uint32_t *table,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index aa0a5acdca..ca55cff48b 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11289,7 +11289,7 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
 }
 
 /*
- * Release an ASO CT action.
+ * Release an ASO CT action by its own device.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
@@ -11300,12 +11300,12 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
  *   0 when CT action was removed, otherwise the number of references.
  */
 static inline int
-flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
+flow_dv_aso_ct_dev_release(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
 	uint32_t ret;
-	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
+	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_dev_idx(dev, idx);
 	enum mlx5_aso_ct_state state =
 			__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
 
@@ -11334,7 +11334,21 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
 		rte_spinlock_unlock(&mng->ct_sl);
 	}
-	return ret;
+	return (int)ret;
+}
+
+static inline int
+flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t own_idx)
+{
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(own_idx);
+	uint32_t idx = MLX5_INDIRECT_ACT_CT_GET_IDX(own_idx);
+	struct rte_eth_dev *owndev = &rte_eth_devices[owner];
+	RTE_SET_USED(dev);
+
+	MLX5_ASSERT(owner < RTE_MAX_ETHPORTS);
+	if (dev->data->dev_started != 1)
+		return -1;
+	return flow_dv_aso_ct_dev_release(owndev, idx);
 }
 
 /*
@@ -11486,7 +11500,7 @@ flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
 		RTE_SET_USED(reg_c);
 #endif
 		if (!ct->dr_action_orig) {
-			flow_dv_aso_ct_release(dev, ct_idx);
+			flow_dv_aso_ct_dev_release(dev, ct_idx);
 			rte_flow_error_set(error, rte_errno,
 					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					   "failed to create ASO CT action");
@@ -11502,7 +11516,7 @@ flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
 			 reg_c - REG_C_0);
 #endif
 		if (!ct->dr_action_rply) {
-			flow_dv_aso_ct_release(dev, ct_idx);
+			flow_dv_aso_ct_dev_release(dev, ct_idx);
 			rte_flow_error_set(error, rte_errno,
 					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					   "failed to create ASO CT action");
@@ -11544,12 +11558,13 @@ flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
 		return rte_flow_error_set(error, rte_errno,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to allocate CT object");
-	ct = flow_aso_ct_get_by_idx(dev, idx);
+	ct = flow_aso_ct_get_by_dev_idx(dev, idx);
 	if (mlx5_aso_ct_update_by_wqe(sh, ct, pro))
 		return rte_flow_error_set(error, EBUSY,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to update CT");
 	ct->is_original = !!pro->is_original_dir;
+	ct->peer = pro->peer_port;
 	return idx;
 }
 
@@ -11713,7 +11728,7 @@ flow_dv_translate(struct rte_eth_dev *dev,
 		const struct rte_flow_action *found_action = NULL;
 		uint32_t jump_group = 0;
 		struct mlx5_flow_counter *cnt;
-		uint32_t ct_idx;
+		uint32_t owner_idx;
 		struct mlx5_aso_ct_action *ct;
 
 		if (!mlx5_flow_os_action_supported(action_type))
@@ -12189,8 +12204,13 @@ flow_dv_translate(struct rte_eth_dev *dev,
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			break;
 		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
-			ct_idx = (uint32_t)(uintptr_t)action->conf;
-			ct = flow_aso_ct_get_by_idx(dev, ct_idx);
+			owner_idx = (uint32_t)(uintptr_t)action->conf;
+			ct = flow_aso_ct_get_by_idx(dev, owner_idx);
+			if (!ct)
+				return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						NULL,
+						"Failed to get CT object.");
 			if (mlx5_aso_ct_available(priv->sh, ct))
 				return rte_flow_error_set(error, rte_errno,
 						RTE_FLOW_ERROR_TYPE_ACTION,
@@ -12203,7 +12223,7 @@ flow_dv_translate(struct rte_eth_dev *dev,
 				dev_flow->dv.actions[actions_n] =
 							ct->dr_action_rply;
 			flow->indirect_type = MLX5_INDIRECT_ACTION_TYPE_CT;
-			flow->ct = ct_idx;
+			flow->ct = owner_idx;
 			__atomic_fetch_add(&ct->refcnt, 1, __ATOMIC_RELAXED);
 			actions_n++;
 			action_flags |= MLX5_FLOW_ACTION_CT;
@@ -13803,8 +13823,7 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
 		ret = flow_dv_translate_create_conntrack(dev, action->conf,
 							 err);
-		idx = (MLX5_INDIRECT_ACTION_TYPE_CT <<
-		       MLX5_INDIRECT_ACTION_TYPE_OFFSET) | ret;
+		idx = MLX5_INDIRECT_ACT_CT_GEN_IDX(PORT_ID(priv), ret);
 		break;
 	default:
 		rte_flow_error_set(err, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
@@ -13856,7 +13875,9 @@ flow_dv_action_destroy(struct rte_eth_dev *dev,
 		return 0;
 	case MLX5_INDIRECT_ACTION_TYPE_CT:
 		ret = flow_dv_aso_ct_release(dev, idx);
-		if (ret)
+		if (ret < 0)
+			return ret;
+		if (ret > 0)
 			DRV_LOG(DEBUG, "Connection tracking object %u still "
 				"has references %d.", idx, ret);
 		return 0;
@@ -13960,8 +13981,16 @@ __flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
 	struct mlx5_aso_ct_action *ct;
 	const struct rte_flow_action_conntrack *new_prf;
 	int ret = 0;
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(idx);
+	uint32_t dev_idx;
 
-	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (PORT_ID(priv) != owner)
+		return rte_flow_error_set(error, EACCES,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "CT object owned by another port");
+	dev_idx = MLX5_INDIRECT_ACT_CT_GET_IDX(idx);
+	ct = flow_aso_ct_get_by_dev_idx(dev, dev_idx);
 	if (!ct->refcnt)
 		return rte_flow_error_set(error, ENOMEM,
 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -14049,6 +14078,8 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_action *ct;
+	uint16_t owner;
+	uint32_t dev_idx;
 
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
@@ -14063,7 +14094,15 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 			     (&age_param->sec_since_last_hit, __ATOMIC_RELAXED);
 		return 0;
 	case MLX5_INDIRECT_ACTION_TYPE_CT:
-		ct = flow_aso_ct_get_by_idx(dev, idx);
+		owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(idx);
+		if (owner != PORT_ID(priv))
+			return rte_flow_error_set(error, EACCES,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"CT object owned by another port");
+		dev_idx = MLX5_INDIRECT_ACT_CT_GET_IDX(idx);
+		ct = flow_aso_ct_get_by_dev_idx(dev, dev_idx);
+		MLX5_ASSERT(ct);
 		if (!ct->refcnt)
 			return rte_flow_error_set(error, EFAULT,
 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v3 17/17] doc: update mlx5 support for conntrack
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (15 preceding siblings ...)
  2021-05-05  6:50   ` [dpdk-dev] [PATCH v3 16/17] net/mlx5: add support of CT between two ports Bing Zhao
@ 2021-05-05  6:50   ` Bing Zhao
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  6:50 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

In the release notes and mlx5 NIC document, the support and
limitation of connection tracking are added.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 doc/guides/nics/features/default.ini   |  1 +
 doc/guides/nics/features/mlx5.ini      |  1 +
 doc/guides/nics/mlx5.rst               | 14 ++++++++++++++
 doc/guides/rel_notes/release_21_05.rst |  2 ++
 4 files changed, 18 insertions(+)

diff --git a/doc/guides/nics/features/default.ini b/doc/guides/nics/features/default.ini
index 8046bd121e..0deb4ef547 100644
--- a/doc/guides/nics/features/default.ini
+++ b/doc/guides/nics/features/default.ini
@@ -66,6 +66,7 @@ Module EEPROM dump   =
 Registers dump       =
 LED                  =
 Multiprocess aware   =
+Connection tracking  =
 FreeBSD              =
 Linux                =
 Windows              =
diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index ddd131da16..45dbe75d07 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -45,6 +45,7 @@ Stats per queue      = Y
 FW version           = Y
 Module EEPROM dump   = Y
 Multiprocess aware   = Y
+Connection tracking  = Y
 Linux                = Y
 Windows              = P
 ARMv8                = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 2bb4f18a08..238da94118 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -107,6 +107,7 @@ Features
 - 21844 flow priorities for ingress or egress flow groups greater than 0 and for any transfer
   flow group.
 - Flow metering, including meter policy API.
+- Connection tracking.
 
 Limitations
 -----------
@@ -418,6 +419,14 @@ Limitations
      - RED: must be DROP.
   - meter profile packet mode is supported.
 
+- Connection tracking:
+
+  - connection tracking (conntrack) couldn't co-exist with ASO meter, ASO age action in a single flow rule.
+  - Flow rules insertion rate and memory consumption.
+  - software limitation:
+     - ports: a maximal number of 256.
+     - conntrack: a maximal number of 4M.
+
 Statistics
 ----------
 
@@ -1680,6 +1689,11 @@ Supported hardware offloads
    |                       | | rdma-core 35  | | rdma-core 35  |
    |                       | | ConnectX-5    | | ConnectX-5    |
    +-----------------------+-----------------+-----------------+
+   | Connection tracking   | |               | | DPDK 21.05    |
+   |                       | |     N/A       | | OFED 5.3      |
+   |                       | |               | | rdma-core 35  |
+   |                       | |               | | ConnectX-6 Dx |
+   +-----------------------+-----------------+-----------------+
 
 .. table:: Minimal SW/HW versions for shared action offload
    :name: sact
diff --git a/doc/guides/rel_notes/release_21_05.rst b/doc/guides/rel_notes/release_21_05.rst
index efd68e8c7c..4c4c37ef87 100644
--- a/doc/guides/rel_notes/release_21_05.rst
+++ b/doc/guides/rel_notes/release_21_05.rst
@@ -166,6 +166,8 @@ New Features
   * Added support for ASO (Advanced Steering Operation) meter.
   * Added support for ASO metering by PPS (packet per second).
   * Added support for the monitor policy of Power Management API.
+  * Added support for connection tracking action and item as well as context create,
+    destroy, update and query.
 
 * **Updated NXP DPAA driver.**
 
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (19 preceding siblings ...)
  2021-05-05  6:49 ` [dpdk-dev] [PATCH v3 00/17] conntrack support in mlx5 PMD Bing Zhao
@ 2021-05-05  7:19 ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 01/17] common/mlx5: add connection tracking object definition Bing Zhao
                     ` (16 more replies)
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (2 subsequent siblings)
  23 siblings, 17 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

This patch set adds the connection tracking offload support in the
mlx5 driver, as well as the documents update.
 
---
v2: code bug fixes, commits clean up and doc update.
v3: fix error input pointer for CT MR registering
v4: fix typo in commit message of patch 11
---

Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

Bing Zhao (17):
  common/mlx5: add connection tracking object definition
  common/mlx5: add CT offload capability checking
  net/mlx5: use meter color reg for CT
  net/mlx5: initialization of CT management
  common/mlx5: add Dexv CT objects creation
  net/mlx5: add modify support for CT
  net/mlx5: add actions creating for CT
  net/mlx5: close CT management structure
  net/mlx5: add ASO CT query implementation
  net/mlx5: add ASO CT destroy handling
  net/mlx5: add translation of CT action
  net/mlx5: add translation of CT item
  net/mlx5: add CT context update
  net/mlx5: validation of CT action
  net/mlx5: validation of CT item
  net/mlx5: add support of CT between two ports
  doc: update mlx5 support for conntrack

 doc/guides/nics/features/default.ini   |   1 +
 doc/guides/nics/features/mlx5.ini      |   1 +
 doc/guides/nics/mlx5.rst               |  14 +
 doc/guides/rel_notes/release_21_05.rst |   2 +
 drivers/common/mlx5/linux/meson.build  |   2 +
 drivers/common/mlx5/mlx5_devx_cmds.c   |  53 +++
 drivers/common/mlx5/mlx5_devx_cmds.h   |   5 +
 drivers/common/mlx5/mlx5_prm.h         |  88 ++++
 drivers/common/mlx5/version.map        |   1 +
 drivers/net/mlx5/linux/mlx5_os.c       |  13 +
 drivers/net/mlx5/mlx5.c                |  92 ++++
 drivers/net/mlx5/mlx5.h                |  76 ++++
 drivers/net/mlx5/mlx5_flow.c           |  44 +-
 drivers/net/mlx5/mlx5_flow.h           | 101 ++++-
 drivers/net/mlx5/mlx5_flow_aso.c       | 592 ++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c        | 601 ++++++++++++++++++++++++-
 16 files changed, 1683 insertions(+), 3 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 01/17] common/mlx5: add connection tracking object definition
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 02/17] common/mlx5: add CT offload capability checking Bing Zhao
                     ` (15 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The structures of ASO connection tracking offload object are added
based on the definitions in the PRM. One CT object context will be
loaded into the cache completely in a reversed order of dwords. The
valid bit should be the MSB of the last dword. This is used for the
conntrack context creation and update, as well as for the query.

The capabilities 2 (HCA_CAP_2) layout is also added. The connection
tracking related capabilities could be queried via the HCA_CAP_2.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/mlx5_prm.h | 85 ++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index efa5ae67bf..4da89d3379 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1119,6 +1119,7 @@ enum {
 	MLX5_GET_HCA_CAP_OP_MOD_ROCE = 0x4 << 1,
 	MLX5_GET_HCA_CAP_OP_MOD_NIC_FLOW_TABLE = 0x7 << 1,
 	MLX5_GET_HCA_CAP_OP_MOD_VDPA_EMULATION = 0x13 << 1,
+	MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE_2 = 0x20 << 1,
 };
 
 #define MLX5_GENERAL_OBJ_TYPES_CAP_VIRTQ_NET_Q \
@@ -1661,6 +1662,29 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties;
 };
 
+struct mlx5_ifc_cmd_hca_cap_2_bits {
+	u8 reserved_at_0[0x80]; /* End of DW4. */
+	u8 reserved_at_80[0xb];
+	u8 log_max_num_reserved_qpn[0x5];
+	u8 reserved_at_90[0x3];
+	u8 log_reserved_qpn_granularity[0x5];
+	u8 reserved_at_98[0x3];
+	u8 log_reserved_qpn_max_alloc[0x5]; /* End of DW5. */
+	u8 max_reformat_insert_size[0x8];
+	u8 max_reformat_insert_offset[0x8];
+	u8 max_reformat_remove_size[0x8];
+	u8 max_reformat_remove_offset[0x8]; /* End of DW6. */
+	u8 aso_conntrack_reg_id[0x8];
+	u8 reserved_at_c8[0x3];
+	u8 log_conn_track_granularity[0x5];
+	u8 reserved_at_d0[0x3];
+	u8 log_conn_track_max_alloc[0x5];
+	u8 reserved_at_d8[0x3];
+	u8 log_max_conn_track_offload[0x5];
+	u8 reserved_at_e0[0x20]; /* End of DW7. */
+	u8 reserved_at_100[0x700];
+};
+
 union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
 	struct mlx5_ifc_per_protocol_networking_offload_caps_bits
@@ -2599,6 +2623,67 @@ struct mlx5_ifc_create_flow_meter_aso_in_bits {
 	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
 	struct mlx5_ifc_flow_meter_aso_bits flow_meter_aso;
 };
+
+struct mlx5_ifc_tcp_window_params_bits {
+	u8 max_ack[0x20];
+	u8 max_win[0x20];
+	u8 reply_end[0x20];
+	u8 sent_end[0x20];
+};
+
+struct mlx5_ifc_conn_track_aso_bits {
+	struct mlx5_ifc_tcp_window_params_bits reply_dir; /* End of DW3. */
+	struct mlx5_ifc_tcp_window_params_bits original_dir; /* End of DW7. */
+	u8 last_end[0x20]; /* End of DW8. */
+	u8 last_ack[0x20]; /* End of DW9. */
+	u8 last_seq[0x20]; /* End of DW10. */
+	u8 last_win[0x10];
+	u8 reserved_at_170[0xa];
+	u8 last_dir[0x1];
+	u8 last_index[0x5]; /* End of DW11. */
+	u8 reserved_at_180[0x40]; /* End of DW13. */
+	u8 reply_direction_tcp_scale[0x4];
+	u8 reply_direction_tcp_close_initiated[0x1];
+	u8 reply_direction_tcp_liberal_enabled[0x1];
+	u8 reply_direction_tcp_data_unacked[0x1];
+	u8 reply_direction_tcp_max_ack[0x1];
+	u8 reserved_at_1c8[0x8];
+	u8 original_direction_tcp_scale[0x4];
+	u8 original_direction_tcp_close_initiated[0x1];
+	u8 original_direction_tcp_liberal_enabled[0x1];
+	u8 original_direction_tcp_data_unacked[0x1];
+	u8 original_direction_tcp_max_ack[0x1];
+	u8 reserved_at_1d8[0x8]; /* End of DW14. */
+	u8 valid[0x1];
+	u8 state[0x3];
+	u8 freeze_track[0x1];
+	u8 reserved_at_1e5[0xb];
+	u8 reserved_at_1f0[0x1];
+	u8 connection_assured[0x1];
+	u8 sack_permitted[0x1];
+	u8 challenged_acked[0x1];
+	u8 heartbeat[0x1];
+	u8 max_ack_window[0x3];
+	u8 reserved_at_1f8[0x1];
+	u8 retransmission_counter[0x3];
+	u8 retranmission_limit_exceeded[0x1];
+	u8 retranmission_limit[0x3]; /* End of DW15. */
+};
+
+struct mlx5_ifc_conn_track_offload_bits {
+	u8 modify_field_select[0x40];
+	u8 reserved_at_40[0x40];
+	u8 reserved_at_80[0x8];
+	u8 conn_track_aso_access_pd[0x18];
+	u8 reserved_at_a0[0x160];
+	struct mlx5_ifc_conn_track_aso_bits conn_track_aso;
+};
+
+struct mlx5_ifc_create_conn_track_aso_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+	struct mlx5_ifc_conn_track_offload_bits conn_track_offload;
+};
+
 enum mlx5_access_aso_opc_mod {
 	ASO_OPC_MOD_IPSEC = 0x0,
 	ASO_OPC_MOD_CONNECTION_TRACKING = 0x1,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 02/17] common/mlx5: add CT offload capability checking
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 01/17] common/mlx5: add connection tracking object definition Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 03/17] net/mlx5: use meter color reg for CT Bing Zhao
                     ` (14 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

During startup, the ASO connection tracking offload capability could
be queried via HCA_CAP_QUERY command. If the HW doesn't support ASO
CT, the value would be 0 by default. The following initialization
should be skipped and the creation of the CT object should return
a failure directly.

The following CT creation should also check this capability. With
the old driver, the pre-processing macro should be used in order to
make the compiling pass.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/linux/meson.build | 2 ++
 drivers/common/mlx5/mlx5_devx_cmds.c  | 3 +++
 drivers/common/mlx5/mlx5_devx_cmds.h  | 1 +
 drivers/common/mlx5/mlx5_prm.h        | 3 +++
 4 files changed, 9 insertions(+)

diff --git a/drivers/common/mlx5/linux/meson.build b/drivers/common/mlx5/linux/meson.build
index 3334bd5cb2..007834a49b 100644
--- a/drivers/common/mlx5/linux/meson.build
+++ b/drivers/common/mlx5/linux/meson.build
@@ -189,6 +189,8 @@ has_sym_args = [
             'MLX5_WQE_UMR_CTRL_FLAG_INLINE' ],
         [ 'HAVE_MLX5_DR_FLOW_DUMP_RULE', 'infiniband/mlx5dv.h',
             'mlx5dv_dump_dr_rule' ],
+        [ 'HAVE_MLX5_DR_ACTION_ASO_CT', 'infiniband/mlx5dv.h',
+            'MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_INITIATOR' ],
 ]
 config = configuration_data()
 foreach arg:has_sym_args
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 79fff6457c..ad67883fde 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -760,6 +760,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
 		MLX5_GET(cmd_hca_cap, hcattr, umr_indirect_mkey_disabled);
 	attr->umr_modify_entity_size_disabled =
 		MLX5_GET(cmd_hca_cap, hcattr, umr_modify_entity_size_disabled);
+	attr->ct_offload = !!(MLX5_GET64(cmd_hca_cap, hcattr,
+					 general_obj_types) &
+			      MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD);
 	if (attr->qos.sup) {
 		MLX5_SET(query_hca_cap_in, in, op_mod,
 			 MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP |
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 870bdb6b30..746320cf04 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -137,6 +137,7 @@ struct mlx5_hca_attr {
 	uint32_t qp_ts_format:2;
 	uint32_t regex:1;
 	uint32_t reg_c_preserve:1;
+	uint32_t ct_offload:1; /* General obj type ASO CT offload supported. */
 	uint32_t regexp_num_of_engines;
 	uint32_t log_max_ft_sampler_num:8;
 	uint32_t geneve_tlv_opt;
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 4da89d3379..71bdf43668 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1134,6 +1134,8 @@ enum {
 			(1ULL << MLX5_GENERAL_OBJ_TYPE_FLOW_METER_ASO)
 #define MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT \
 			(1ULL << MLX5_OBJ_TYPE_GENEVE_TLV_OPT)
+#define MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD \
+			(1ULL << MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD)
 
 enum {
 	MLX5_HCA_CAP_OPMOD_GET_MAX   = 0,
@@ -2456,6 +2458,7 @@ enum {
 	MLX5_GENERAL_OBJ_TYPE_FLEX_PARSE_GRAPH = 0x0022,
 	MLX5_GENERAL_OBJ_TYPE_FLOW_METER_ASO = 0x0024,
 	MLX5_GENERAL_OBJ_TYPE_FLOW_HIT_ASO = 0x0025,
+	MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD = 0x0031,
 };
 
 struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 03/17] net/mlx5: use meter color reg for CT
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 01/17] common/mlx5: add connection tracking object definition Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 02/17] common/mlx5: add CT offload capability checking Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 04/17] net/mlx5: initialization of CT management Bing Zhao
                     ` (13 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Based on the capacity, 3 registers could be used. Due to the register
allocation, only the one REG_C_3 for meter color could be reused
right now.

Then in the same flow, no more than one ASO action can be supported.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.c | 4 +++-
 drivers/net/mlx5/mlx5_flow.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index a7ceafe221..edad6007a8 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -760,7 +760,9 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
 			       REG_C_3;
 	case MLX5_MTR_COLOR:
-	case MLX5_ASO_FLOW_HIT: /* Both features use the same REG_C. */
+	case MLX5_ASO_FLOW_HIT:
+	case MLX5_ASO_CONNTRACK:
+		/* All features use the same REG_C. */
 		MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
 		return priv->mtr_color_reg;
 	case MLX5_COPY_MARK:
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index cc3e79d088..964e13a869 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -84,6 +84,7 @@ enum mlx5_feature_name {
 	MLX5_MTR_COLOR,
 	MLX5_MTR_ID,
 	MLX5_ASO_FLOW_HIT,
+	MLX5_ASO_CONNTRACK,
 };
 
 /* Default queue number. */
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 04/17] net/mlx5: initialization of CT management
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (2 preceding siblings ...)
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 03/17] net/mlx5: use meter color reg for CT Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 05/17] common/mlx5: add Dexv CT objects creation Bing Zhao
                     ` (12 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The definitions of ASO connection tracking objects management
structures are added.

Considering performance, the bulk allocation of ASO CT objects
should be used. The maximal value per bulk and the granularity could
be fetched from HCA capabilities 2. Right now, a fixed number of 64
is used for each bulk for a better management purpose.

The ASO QP for CT is initialized, the SQ will be used for both
modify and query command.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c | 13 +++++++++
 drivers/net/mlx5/mlx5.c          | 36 +++++++++++++++++++++++
 drivers/net/mlx5/mlx5.h          | 50 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_aso.c | 50 ++++++++++++++++++++++++++++++++
 4 files changed, 149 insertions(+)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 479ee7d8d1..5ac787106d 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1323,6 +1323,19 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			DRV_LOG(DEBUG, "Flow Hit ASO is supported.");
 		}
 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */
+#if defined(HAVE_MLX5_DR_CREATE_ACTION_ASO) && \
+	defined(HAVE_MLX5_DR_ACTION_ASO_CT)
+		if (config->hca_attr.ct_offload &&
+		    priv->mtr_color_reg == REG_C_3) {
+			err = mlx5_flow_aso_ct_mng_init(sh);
+			if (err) {
+				err = -err;
+				goto error;
+			}
+			DRV_LOG(DEBUG, "CT ASO is supported.");
+			sh->ct_aso_en = 1;
+		}
+#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO && HAVE_MLX5_DR_ACTION_ASO_CT */
 #if defined(HAVE_MLX5DV_DR) && defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_SAMPLE)
 		if (config->hca_attr.log_max_ft_sampler_num > 0  &&
 		    config->dv_flow_en) {
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 8cd6f1eaee..86dbe6d573 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -670,6 +670,42 @@ mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh)
 	}
 }
 
+/*
+ * Initialize the ASO connection tracking structure.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
+{
+	int err;
+
+	if (sh->ct_mng)
+		return 0;
+	sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng),
+				 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+	if (!sh->ct_mng) {
+		DRV_LOG(ERR, "ASO CT management allocation failed.");
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
+	if (err) {
+		mlx5_free(sh->ct_mng);
+		/* rte_errno should be extracted from the failure. */
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	rte_spinlock_init(&sh->ct_mng->ct_sl);
+	rte_rwlock_init(&sh->ct_mng->resize_rwl);
+	LIST_INIT(&sh->ct_mng->free_cts);
+	return 0;
+}
+
 /**
  * Initialize the flow resources' indexed mempool.
  *
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c62977613a..1a5c78fa3a 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -983,6 +983,52 @@ struct mlx5_bond_info {
 	} ports[MLX5_BOND_MAX_PORTS];
 };
 
+/* Number of connection tracking objects per pool: must be a power of 2. */
+#define MLX5_ASO_CT_ACTIONS_PER_POOL 64
+
+/* ASO Conntrack state. */
+enum mlx5_aso_ct_state {
+	ASO_CONNTRACK_FREE, /* Inactive, in the free list. */
+	ASO_CONNTRACK_WAIT, /* WQE sent in the SQ. */
+	ASO_CONNTRACK_READY, /* CQE received w/o error. */
+	ASO_CONNTRACK_QUERY, /* WQE for query sent. */
+	ASO_CONNTRACK_MAX, /* Guard. */
+};
+
+/* Generic ASO connection tracking structure. */
+struct mlx5_aso_ct_action {
+	LIST_ENTRY(mlx5_aso_ct_action) next; /* Pointer to the next ASO CT. */
+	void *dr_action_orig; /* General action object for original dir. */
+	void *dr_action_rply; /* General action object for reply dir. */
+	uint32_t refcnt; /* Action used count in device flows. */
+	uint16_t offset; /* Offset of ASO CT in DevX objects bulk. */
+	uint16_t peer; /* The only peer port index could also use this CT. */
+	enum mlx5_aso_ct_state state; /* ASO CT state. */
+	bool is_original; /* The direction of the DR action to be used. */
+};
+
+/* ASO connection tracking software pool definition. */
+struct mlx5_aso_ct_pool {
+	uint16_t index; /* Pool index in pools array. */
+	struct mlx5_devx_obj *devx_obj;
+	/* The first devx object in the bulk, used for freeing (not yet). */
+	struct mlx5_aso_ct_action actions[MLX5_ASO_CT_ACTIONS_PER_POOL];
+	/* CT action structures bulk. */
+};
+
+LIST_HEAD(aso_ct_list, mlx5_aso_ct_action);
+
+/* Pools management structure for ASO connection tracking pools. */
+struct mlx5_aso_ct_pools_mng {
+	struct mlx5_aso_ct_pool **pools;
+	uint16_t n; /* Total number of pools. */
+	uint16_t next; /* Number of pools in use, index of next free pool. */
+	rte_spinlock_t ct_sl; /* The ASO CT free list lock. */
+	rte_rwlock_t resize_rwl; /* The ASO CT pool resize lock. */
+	struct aso_ct_list free_cts; /* Free ASO CT objects list. */
+	struct mlx5_aso_sq aso_sq; /* ASO queue objects. */
+};
+
 /*
  * Shared Infiniband device context for Master/Representors
  * which belong to same IB device with multiple IB ports.
@@ -996,6 +1042,7 @@ struct mlx5_dev_ctx_shared {
 	uint32_t sq_ts_format:2; /* SQ timestamp formats supported. */
 	uint32_t qp_ts_format:2; /* QP timestamp formats supported. */
 	uint32_t meter_aso_en:1; /* Flow Meter ASO is supported. */
+	uint32_t ct_aso_en:1; /* Connection Tracking ASO is supported. */
 	uint32_t max_port; /* Maximal IB device port index. */
 	struct mlx5_bond_info bond; /* Bonding information. */
 	void *ctx; /* Verbs/DV/DevX context. */
@@ -1058,6 +1105,8 @@ struct mlx5_dev_ctx_shared {
 	rte_spinlock_t geneve_tlv_opt_sl; /* Lock for geneve tlv resource */
 	struct mlx5_flow_mtr_mng *mtrmng;
 	/* Meter management structure. */
+	struct mlx5_aso_ct_pools_mng *ct_mng;
+	/* Management data for ASO connection tracking. */
 	struct mlx5_dev_shared_port port[]; /* per device port data array. */
 };
 
@@ -1355,6 +1404,7 @@ bool mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev);
 int mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev);
 int mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh);
 int mlx5_aso_flow_mtrs_mng_init(struct mlx5_dev_ctx_shared *sh);
+int mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh);
 
 /* mlx5_ethdev.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 300987d0e9..c24d865284 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -186,6 +186,43 @@ mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq)
 	}
 }
 
+/*
+ * Initialize Send Queue used for ASO connection tracking.
+ *
+ * @param[in] sq
+ *   ASO SQ to initialize.
+ */
+static void
+mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
+{
+	volatile struct mlx5_aso_wqe *restrict wqe;
+	int i;
+	int size = 1 << sq->log_desc_n;
+	uint64_t addr;
+
+	/* All the next fields state should stay constant. */
+	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
+		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
+							  (sizeof(*wqe) >> 4));
+		/* One unique MR for the query data. */
+		wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.mkey->id);
+		/* Magic number 64 represents the length of a ASO CT obj. */
+		addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64);
+		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
+		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
+		/*
+		 * The values of operand_masks are different for modify
+		 * and query.
+		 * And data_mask may be different for each modification. In
+		 * query, it could be zero and ignored.
+		 * CQE generation is always needed, in order to decide when
+		 * it is available to create the flow or read the data.
+		 */
+		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
+						   MLX5_COMP_MODE_OFFSET);
+	}
+}
+
 /**
  * Create Send Queue used for ASO access.
  *
@@ -293,6 +330,19 @@ mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
 			return -1;
 		mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
 		break;
+	case ASO_OPC_MOD_CONNECTION_TRACKING:
+		/* 64B per object for query. */
+		if (mlx5_aso_reg_mr(sh, 64 * sq_desc_n,
+				    &sh->ct_mng->aso_sq.mr, 0))
+			return -1;
+		if (mlx5_aso_sq_create(sh->ctx, &sh->ct_mng->aso_sq, 0,
+				sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
+				sh->sq_ts_format)) {
+			mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
+			return -1;
+		}
+		mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
+		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
 		return -1;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 05/17] common/mlx5: add Dexv CT objects creation
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (3 preceding siblings ...)
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 04/17] net/mlx5: initialization of CT management Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 06/17] net/mlx5: add modify support for CT Bing Zhao
                     ` (11 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Adding support for connection tracking ASO creation via Devx command.
Right now only bulk creation is supported.

By default, the objects with zero contents will be created. Before
using a single object, the modification via posting a WQE to the ASO
CT SQ is needed.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c | 50 ++++++++++++++++++++++++++++
 drivers/common/mlx5/mlx5_devx_cmds.h |  4 +++
 drivers/common/mlx5/version.map      |  1 +
 3 files changed, 55 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index ad67883fde..dc01266642 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -2232,6 +2232,56 @@ mlx5_devx_cmd_create_flow_meter_aso_obj(void *ctx, uint32_t pd,
 	return flow_meter_aso_obj;
 }
 
+/*
+ * Create general object of type CONN_TRACK_OFFLOAD using DevX API.
+ *
+ * @param[in] ctx
+ *   Context returned from mlx5 open_device() glue function.
+ * @param [in] pd
+ *   PD value to associate the CONN_TRACK_OFFLOAD ASO object with.
+ * @param [in] log_obj_size
+ *   log_obj_size to allocate its power of 2 * objects
+ *   in one CONN_TRACK_OFFLOAD bulk allocation.
+ *
+ * @return
+ *   The DevX object created, NULL otherwise and rte_errno is set.
+ */
+struct mlx5_devx_obj *
+mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx, uint32_t pd,
+					    uint32_t log_obj_size)
+{
+	uint32_t in[MLX5_ST_SZ_DW(create_conn_track_aso_in)] = {0};
+	uint32_t out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	struct mlx5_devx_obj *ct_aso_obj;
+	void *ptr;
+
+	ct_aso_obj = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ct_aso_obj),
+				 0, SOCKET_ID_ANY);
+	if (!ct_aso_obj) {
+		DRV_LOG(ERR, "Failed to allocate CONN_TRACK_OFFLOAD object.");
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	ptr = MLX5_ADDR_OF(create_conn_track_aso_in, in, hdr);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, obj_type,
+		 MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, log_obj_range, log_obj_size);
+	ptr = MLX5_ADDR_OF(create_conn_track_aso_in, in, conn_track_offload);
+	MLX5_SET(conn_track_offload, ptr, conn_track_aso_access_pd, pd);
+	ct_aso_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
+						     out, sizeof(out));
+	if (!ct_aso_obj->obj) {
+		rte_errno = errno;
+		DRV_LOG(ERR, "Failed to create CONN_TRACK_OFFLOAD obj by using DevX.");
+		mlx5_free(ct_aso_obj);
+		return NULL;
+	}
+	ct_aso_obj->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	return ct_aso_obj;
+}
+
 /**
  * Create general object of type GENEVE TLV option using DevX API.
  *
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 746320cf04..e67cea506d 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -569,6 +569,10 @@ struct mlx5_devx_obj *mlx5_devx_cmd_queue_counter_alloc(void *ctx);
 __rte_internal
 int mlx5_devx_cmd_queue_counter_query(struct mlx5_devx_obj *dcs, int clear,
 				      uint32_t *out_of_buffers);
+__rte_internal
+struct mlx5_devx_obj *mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx,
+					uint32_t pd, uint32_t log_obj_size);
+
 /**
  * Create general object of type FLOW_METER_ASO using DevX API..
  *
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index 18dc96276d..4bbcba5b8e 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -13,6 +13,7 @@ INTERNAL {
 	mlx5_dev_to_pci_addr; # WINDOWS_NO_EXPORT
 
 	mlx5_devx_cmd_alloc_pd;
+	mlx5_devx_cmd_create_conn_track_offload_obj;
 	mlx5_devx_cmd_create_cq;
 	mlx5_devx_cmd_create_flex_parser;
 	mlx5_devx_cmd_create_qp;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 06/17] net/mlx5: add modify support for CT
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (4 preceding siblings ...)
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 05/17] common/mlx5: add Dexv CT objects creation Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 07/17] net/mlx5: add actions creating " Bing Zhao
                     ` (10 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After the connection tracking object bulk is allocated, all the
objects' contents are filled with zero by default. Every
new-allocated object must be modified via WQE operation before it is
used.

In order to reduce the latency for the flow creation, an asynchronous
way is used instead of busy waiting for the CQE to be generated.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |   8 +
 drivers/net/mlx5/mlx5_flow.h     |   3 +
 drivers/net/mlx5/mlx5_flow_aso.c | 252 +++++++++++++++++++++++++++++++
 3 files changed, 263 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1a5c78fa3a..1898a0401f 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -490,6 +490,7 @@ struct mlx5_aso_sq_elem {
 			uint16_t burst_size;
 		};
 		struct mlx5_aso_mtr *mtr;
+		struct mlx5_aso_ct_action *ct;
 	};
 };
 
@@ -1007,6 +1008,10 @@ struct mlx5_aso_ct_action {
 	bool is_original; /* The direction of the DR action to be used. */
 };
 
+/* CT action object state update. */
+#define MLX5_ASO_CT_UPDATE_STATE(c, s) \
+	__atomic_store_n(&((c)->state), (s), __ATOMIC_RELAXED)
+
 /* ASO connection tracking software pool definition. */
 struct mlx5_aso_ct_pool {
 	uint16_t index; /* Pool index in pools array. */
@@ -1690,5 +1695,8 @@ int mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr);
 int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr);
+int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			      struct mlx5_aso_ct_action *ct,
+			      const struct rte_flow_action_conntrack *profile);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 964e13a869..eb5b53ac6a 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -45,6 +45,7 @@ enum mlx5_rte_flow_action_type {
 enum {
 	MLX5_INDIRECT_ACTION_TYPE_RSS,
 	MLX5_INDIRECT_ACTION_TYPE_AGE,
+	MLX5_INDIRECT_ACTION_TYPE_AGE,
 };
 
 /* Matches on selected register. */
@@ -839,6 +840,8 @@ struct mlx5_flow {
 #define MLX5_ASO_WQE_CQE_RESPONSE_DELAY 10u
 #define MLX5_MTR_POLL_WQE_CQE_TIMES 100000u
 
+#define MLX5_CT_POLL_WQE_CQE_TIMES MLX5_MTR_POLL_WQE_CQE_TIMES
+
 #define MLX5_MAN_WIDTH 8
 /* Legacy Meter parameter structure. */
 struct mlx5_legacy_flow_meter {
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index c24d865284..0ff19e6171 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -887,3 +887,255 @@ mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 			mtr->offset);
 	return -1;
 }
+
+/*
+ * Post a WQE to the ASO CT SQ to modify the context.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ * @param[in] ct
+ *   Pointer to the generic CT structure related to the context.
+ * @param[in] profile
+ *   Pointer to configuration profile.
+ *
+ * @return
+ *   1 on success (WQE number), 0 on failure.
+ */
+static uint16_t
+mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
+			      struct mlx5_aso_ct_action *ct,
+			      const struct rte_flow_action_conntrack *profile)
+{
+	volatile struct mlx5_aso_wqe *wqe = NULL;
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t res;
+	struct mlx5_aso_ct_pool *pool;
+	void *desg;
+	void *orig_dir;
+	void *reply_dir;
+
+	rte_spinlock_lock(&sq->sqsl);
+	/* Prevent other threads to update the index. */
+	res = size - (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!res)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
+		return 0;
+	}
+	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
+	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
+	/* Fill next WQE. */
+	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
+	sq->elts[sq->head & mask].ct = ct;
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	/* Each WQE will have a single CT object. */
+	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
+						  ct->offset);
+	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
+			(ASO_OPC_MOD_CONNECTION_TRACKING <<
+			 WQE_CSEG_OPC_MOD_OFFSET) |
+			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
+	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
+			(0u |
+			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
+			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
+	wqe->aso_cseg.data_mask = UINT64_MAX;
+	/* To make compiler happy. */
+	desg = (void *)(uintptr_t)wqe->aso_dseg.data;
+	MLX5_SET(conn_track_aso, desg, valid, 1);
+	MLX5_SET(conn_track_aso, desg, state, profile->state);
+	MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable);
+	MLX5_SET(conn_track_aso, desg, connection_assured,
+		 profile->live_connection);
+	MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack);
+	MLX5_SET(conn_track_aso, desg, challenged_acked,
+		 profile->challenge_ack_passed);
+	/* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */
+	MLX5_SET(conn_track_aso, desg, heartbeat, 0);
+	MLX5_SET(conn_track_aso, desg, max_ack_window,
+		 profile->max_ack_window);
+	MLX5_SET(conn_track_aso, desg, retransmission_counter, 0);
+	MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0);
+	MLX5_SET(conn_track_aso, desg, retranmission_limit,
+		 profile->retransmission_limit);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale,
+		 profile->reply_dir.scale);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated,
+		 profile->reply_dir.close_initiated);
+	/* Both directions will use the same liberal mode. */
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled,
+		 profile->liberal_mode);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked,
+		 profile->reply_dir.data_unacked);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack,
+		 profile->reply_dir.last_ack_seen);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale,
+		 profile->original_dir.scale);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated,
+		 profile->original_dir.close_initiated);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled,
+		 profile->liberal_mode);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked,
+		 profile->original_dir.data_unacked);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack,
+		 profile->original_dir.last_ack_seen);
+	MLX5_SET(conn_track_aso, desg, last_win, profile->last_window);
+	MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction);
+	MLX5_SET(conn_track_aso, desg, last_index, profile->last_index);
+	MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq);
+	MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack);
+	MLX5_SET(conn_track_aso, desg, last_end, profile->last_end);
+	orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir);
+	MLX5_SET(tcp_window_params, orig_dir, sent_end,
+		 profile->original_dir.sent_end);
+	MLX5_SET(tcp_window_params, orig_dir, reply_end,
+		 profile->original_dir.reply_end);
+	MLX5_SET(tcp_window_params, orig_dir, max_win,
+		 profile->original_dir.max_win);
+	MLX5_SET(tcp_window_params, orig_dir, max_ack,
+		 profile->original_dir.max_ack);
+	reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir);
+	MLX5_SET(tcp_window_params, reply_dir, sent_end,
+		 profile->reply_dir.sent_end);
+	MLX5_SET(tcp_window_params, reply_dir, reply_end,
+		 profile->reply_dir.reply_end);
+	MLX5_SET(tcp_window_params, reply_dir, max_win,
+		 profile->reply_dir.max_win);
+	MLX5_SET(tcp_window_params, reply_dir, max_ack,
+		 profile->reply_dir.max_ack);
+	sq->head++;
+	sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
+	rte_io_wmb();
+	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
+	rte_wmb();
+	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
+	rte_wmb();
+	rte_spinlock_unlock(&sq->sqsl);
+	return 1;
+}
+
+/*
+ * Update the status field of CTs to indicate ready to be used by flows.
+ * A continuous number of CTs since last update.
+ *
+ * @param[in] sq
+ *   Pointer to ASO CT SQ.
+ * @param[in] num
+ *   Number of CT structures to be updated.
+ *
+ * @return
+ *   0 on success, a negative value.
+ */
+static void
+mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
+{
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t i;
+	struct mlx5_aso_ct_action *ct = NULL;
+	uint16_t idx;
+
+	for (i = 0; i < num; i++) {
+		idx = (uint16_t)((sq->tail + i) & mask);
+		ct = sq->elts[idx].ct;
+		MLX5_ASSERT(ct);
+		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
+	}
+}
+
+/*
+ * Handle completions from WQEs sent to ASO CT.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ */
+static void
+mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
+{
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	struct mlx5_aso_cq *cq = &sq->cq;
+	volatile struct mlx5_cqe *restrict cqe;
+	const uint32_t cq_size = 1 << cq->log_desc_n;
+	const uint32_t mask = cq_size - 1;
+	uint32_t idx;
+	uint32_t next_idx;
+	uint16_t max;
+	uint16_t n = 0;
+	int ret;
+
+	rte_spinlock_lock(&sq->sqsl);
+	max = (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!max)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		return;
+	}
+	next_idx = cq->cq_ci & mask;
+	do {
+		idx = next_idx;
+		next_idx = (cq->cq_ci + 1) & mask;
+		/* Need to confirm the position of the prefetch. */
+		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
+		cqe = &cq->cq_obj.cqes[idx];
+		ret = check_cqe(cqe, cq_size, cq->cq_ci);
+		/*
+		 * Be sure owner read is done before any other cookie field or
+		 * opaque field.
+		 */
+		rte_io_rmb();
+		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
+			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
+				break;
+			mlx5_aso_cqe_err_handle(sq);
+		} else {
+			n++;
+		}
+		cq->cq_ci++;
+	} while (1);
+	if (likely(n)) {
+		mlx5_aso_ct_status_update(sq, n);
+		sq->tail += n;
+		rte_io_wmb();
+		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
+	}
+	rte_spinlock_unlock(&sq->sqsl);
+}
+
+/*
+ * Update connection tracking ASO context by sending WQE.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ * @param[in] profile
+ *   Pointer to connection tracking TCP parameter.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			  struct mlx5_aso_ct_action *ct,
+			  const struct rte_flow_action_conntrack *profile)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+
+	MLX5_ASSERT(ct);
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		if (mlx5_aso_ct_sq_enqueue_single(mng, ct, profile))
+			return 0;
+		/* Waiting for wqe resource. */
+		rte_delay_us_sleep(10u);
+	} while (--poll_wqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+}
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 07/17] net/mlx5: add actions creating for CT
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (5 preceding siblings ...)
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 06/17] net/mlx5: add modify support for CT Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 08/17] net/mlx5: close CT management structure Bing Zhao
                     ` (9 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Allocating a CT from the management pools and creating the DR actions
for both directions by default.

If there is no available connection tracking action, a new pool will
be created with a fixed size bulk allocation. Right now, all the
resources are controlled by the linked list.

The ASO connection tracking context associated with these actions
need to be updated via WQE before using for steering.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h         |   4 +
 drivers/net/mlx5/mlx5_flow.h    |  29 +++-
 drivers/net/mlx5/mlx5_flow_dv.c | 263 ++++++++++++++++++++++++++++++++
 3 files changed, 295 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1898a0401f..de18a59c8e 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -987,6 +987,10 @@ struct mlx5_bond_info {
 /* Number of connection tracking objects per pool: must be a power of 2. */
 #define MLX5_ASO_CT_ACTIONS_PER_POOL 64
 
+/* Generate incremental and unique CT index from pool and offset. */
+#define MLX5_MAKE_CT_IDX(pool, offset) \
+	((pool) * MLX5_ASO_CT_ACTIONS_PER_POOL + (offset) + 1)
+
 /* ASO Conntrack state. */
 enum mlx5_aso_ct_state {
 	ASO_CONNTRACK_FREE, /* Inactive, in the free list. */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index eb5b53ac6a..8f2bc7d2f6 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -45,7 +45,7 @@ enum mlx5_rte_flow_action_type {
 enum {
 	MLX5_INDIRECT_ACTION_TYPE_RSS,
 	MLX5_INDIRECT_ACTION_TYPE_AGE,
-	MLX5_INDIRECT_ACTION_TYPE_AGE,
+	MLX5_INDIRECT_ACTION_TYPE_CT,
 };
 
 /* Matches on selected register. */
@@ -1288,6 +1288,33 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
 	return &pool->mtrs[idx % MLX5_ASO_MTRS_PER_POOL];
 }
 
+/*
+ * Get ASO CT action by index.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index to the ASO CT action.
+ *
+ * @return
+ *   The specified ASO CT action pointer.
+ */
+static inline struct mlx5_aso_ct_action *
+flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_pool *pool;
+
+	idx--;
+	MLX5_ASSERT((idx / MLX5_ASO_CT_ACTIONS_PER_POOL) < mng->n);
+	/* Bit operation AND could be used. */
+	rte_rwlock_read_lock(&mng->resize_rwl);
+	pool = mng->pools[idx / MLX5_ASO_CT_ACTIONS_PER_POOL];
+	rte_rwlock_read_unlock(&mng->resize_rwl);
+	return &pool->actions[idx % MLX5_ASO_CT_ACTIONS_PER_POOL];
+}
+
 int mlx5_flow_group_to_table(struct rte_eth_dev *dev,
 			     const struct mlx5_flow_tunnel *tunnel,
 			     uint32_t group, uint32_t *table,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 0d022dff3f..c8ff693e4c 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11120,6 +11120,262 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
 	return age_idx;
 }
 
+/*
+ * Release an ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index of ASO CT action to release.
+ *
+ * @return
+ *   0 when CT action was removed, otherwise the number of references.
+ */
+static inline int
+flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
+	uint32_t ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
+
+	if (!ret) {
+		if (ct->dr_action_orig) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			claim_zero(mlx5_glue->destroy_flow_action
+					(ct->dr_action_orig));
+#endif
+			ct->dr_action_orig = NULL;
+		}
+		if (ct->dr_action_rply) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			claim_zero(mlx5_glue->destroy_flow_action
+					(ct->dr_action_rply));
+#endif
+			ct->dr_action_rply = NULL;
+		}
+		rte_spinlock_lock(&mng->ct_sl);
+		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
+		rte_spinlock_unlock(&mng->ct_sl);
+	}
+	return ret;
+}
+
+/*
+ * Resize the ASO CT pools array by 64 pools.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value and rte_errno is set.
+ */
+static int
+flow_dv_aso_ct_pools_resize(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	void *old_pools = mng->pools;
+	/* Magic number now, need a macro. */
+	uint32_t resize = mng->n + 64;
+	uint32_t mem_size = sizeof(struct mlx5_aso_ct_pool *) * resize;
+	void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);
+
+	if (!pools) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	rte_rwlock_write_lock(&mng->resize_rwl);
+	/* ASO SQ/QP was already initialized in the startup. */
+	if (old_pools) {
+		/* Realloc could be an alternative choice. */
+		rte_memcpy(pools, old_pools,
+			   mng->n * sizeof(struct mlx5_aso_ct_pool *));
+		mlx5_free(old_pools);
+	}
+	mng->n = resize;
+	mng->pools = pools;
+	rte_rwlock_write_unlock(&mng->resize_rwl);
+	return 0;
+}
+
+/*
+ * Create and initialize a new ASO CT pool.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] ct_free
+ *   Where to put the pointer of a new CT action.
+ *
+ * @return
+ *   The CT actions pool pointer and @p ct_free is set on success,
+ *   NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_aso_ct_pool *
+flow_dv_ct_pool_create(struct rte_eth_dev *dev,
+		       struct mlx5_aso_ct_action **ct_free)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_pool *pool = NULL;
+	struct mlx5_devx_obj *obj = NULL;
+	uint32_t i;
+	uint32_t log_obj_size = rte_log2_u32(MLX5_ASO_CT_ACTIONS_PER_POOL);
+
+	obj = mlx5_devx_cmd_create_conn_track_offload_obj(priv->sh->ctx,
+						priv->sh->pdn, log_obj_size);
+	if (!obj) {
+		rte_errno = ENODATA;
+		DRV_LOG(ERR, "Failed to create conn_track_offload_obj using DevX.");
+		return NULL;
+	}
+	pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool), 0, SOCKET_ID_ANY);
+	if (!pool) {
+		rte_errno = ENOMEM;
+		claim_zero(mlx5_devx_cmd_destroy(obj));
+		return NULL;
+	}
+	pool->devx_obj = obj;
+	pool->index = mng->next;
+	/* Resize pools array if there is no room for the new pool in it. */
+	if (pool->index == mng->n && flow_dv_aso_ct_pools_resize(dev)) {
+		claim_zero(mlx5_devx_cmd_destroy(obj));
+		mlx5_free(pool);
+		return NULL;
+	}
+	mng->pools[pool->index] = pool;
+	mng->next++;
+	/* Assign the first action in the new pool, the rest go to free list. */
+	*ct_free = &pool->actions[0];
+	/* Lock outside, the list operation is safe here. */
+	for (i = 1; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
+		/* refcnt is 0 when allocating the memory. */
+		pool->actions[i].offset = i;
+		LIST_INSERT_HEAD(&mng->free_cts, &pool->actions[i], next);
+	}
+	return pool;
+}
+
+/*
+ * Allocate a ASO CT action from free list.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Index to ASO CT action on success, 0 otherwise and rte_errno is set.
+ */
+static uint32_t
+flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_action *ct = NULL;
+	struct mlx5_aso_ct_pool *pool;
+	uint8_t reg_c;
+	uint32_t ct_idx;
+
+	MLX5_ASSERT(mng);
+	if (!priv->config.devx) {
+		rte_errno = ENOTSUP;
+		return 0;
+	}
+	/* Get a free CT action, if no, a new pool will be created. */
+	rte_spinlock_lock(&mng->ct_sl);
+	ct = LIST_FIRST(&mng->free_cts);
+	if (ct) {
+		LIST_REMOVE(ct, next);
+	} else if (!flow_dv_ct_pool_create(dev, &ct)) {
+		rte_spinlock_unlock(&mng->ct_sl);
+		rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_ACTION,
+				   NULL, "failed to create ASO CT pool");
+		return 0;
+	}
+	rte_spinlock_unlock(&mng->ct_sl);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	ct_idx = MLX5_MAKE_CT_IDX(pool->index, ct->offset);
+	/* 0: inactive, 1: created, 2+: used by flows. */
+	__atomic_store_n(&ct->refcnt, 1, __ATOMIC_RELAXED);
+	reg_c = mlx5_flow_get_reg_id(dev, MLX5_ASO_CONNTRACK, 0, error);
+	if (!ct->dr_action_orig) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+		ct->dr_action_orig = mlx5_glue->dv_create_flow_action_aso
+			(priv->sh->rx_domain, pool->devx_obj->obj,
+			 ct->offset,
+			 MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_INITIATOR,
+			 reg_c - REG_C_0);
+#else
+		RTE_SET_USED(reg_c);
+#endif
+		if (!ct->dr_action_orig) {
+			flow_dv_aso_ct_release(dev, ct_idx);
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					   "failed to create ASO CT action");
+			return 0;
+		}
+	}
+	if (!ct->dr_action_rply) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+		ct->dr_action_rply = mlx5_glue->dv_create_flow_action_aso
+			(priv->sh->rx_domain, pool->devx_obj->obj,
+			 ct->offset,
+			 MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_RESPONDER,
+			 reg_c - REG_C_0);
+#endif
+		if (!ct->dr_action_rply) {
+			flow_dv_aso_ct_release(dev, ct_idx);
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					   "failed to create ASO CT action");
+			return 0;
+		}
+	}
+	return ct_idx;
+}
+
+/*
+ * Create a conntrack object with context and actions by using ASO mechanism.
+ *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
+ * @param[in] pro
+ *   Pointer to conntrack information profile.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Index to conntrack object on success, 0 otherwise.
+ */
+static uint32_t
+flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
+				   const struct rte_flow_action_conntrack *pro,
+				   struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
+	struct mlx5_aso_ct_action *ct;
+	uint32_t idx;
+
+	if (!sh->ct_aso_en)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Connection is not supported");
+	idx = flow_dv_aso_ct_alloc(dev, error);
+	if (!idx)
+		return rte_flow_error_set(error, rte_errno,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Failed to allocate CT object");
+	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (mlx5_aso_ct_update_by_wqe(sh, ct, pro))
+		return rte_flow_error_set(error, EBUSY,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Failed to update CT");
+	return idx;
+}
+
 /**
  * Fill the flow with DV spec, lock free
  * (mutex should be acquired by caller).
@@ -13317,6 +13573,7 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 {
 	uint32_t idx = 0;
 	uint32_t ret = 0;
+	struct mlx5_priv *priv = dev->data->dev_private;
 
 	switch (action->type) {
 	case RTE_FLOW_ACTION_TYPE_RSS:
@@ -13337,6 +13594,12 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 							 (void *)(uintptr_t)idx;
 		}
 		break;
+	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+		ret = flow_dv_translate_create_conntrack(dev, action->conf,
+							 err);
+		idx = (MLX5_INDIRECT_ACTION_TYPE_CT <<
+		       MLX5_INDIRECT_ACTION_TYPE_OFFSET) | ret;
+		break;
 	default:
 		rte_flow_error_set(err, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
 				   NULL, "action type not supported");
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 08/17] net/mlx5: close CT management structure
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (6 preceding siblings ...)
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 07/17] net/mlx5: add actions creating " Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 09/17] net/mlx5: add ASO CT query implementation Bing Zhao
                     ` (8 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When freeing the IB shared context during stopping a device, the
ASO connection tracking management structure should also be cleaned
up.

All the DR actions created should be destroyed. The structures need
to be freed and ASO CT QP should be released. In the meanwhile, the
allocated and registered memory region for query should also be
deregistered and then freed.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.c          | 56 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_aso.c |  4 +++
 2 files changed, 60 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 86dbe6d573..d563da109a 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -706,6 +706,60 @@ mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
 	return 0;
 }
 
+/*
+ * Close and release all the resources of the
+ * ASO connection tracking management structure.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object to free.
+ */
+static void
+mlx5_flow_aso_ct_mng_close(struct mlx5_dev_ctx_shared *sh)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	struct mlx5_aso_ct_pool *ct_pool;
+	struct mlx5_aso_ct_action *ct;
+	uint32_t idx;
+	uint32_t val;
+	uint32_t cnt;
+	int i;
+
+	mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
+	idx = mng->next;
+	while (idx--) {
+		cnt = 0;
+		ct_pool = mng->pools[idx];
+		for (i = 0; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
+			ct = &ct_pool->actions[i];
+			val = __atomic_fetch_sub(&ct->refcnt, 1,
+						 __ATOMIC_RELAXED);
+			MLX5_ASSERT(val == 1);
+			if (val > 1)
+				cnt++;
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			if (ct->dr_action_orig)
+				claim_zero(mlx5_glue->destroy_flow_action
+							(ct->dr_action_orig));
+			if (ct->dr_action_rply)
+				claim_zero(mlx5_glue->destroy_flow_action
+							(ct->dr_action_rply));
+#endif
+		}
+		claim_zero(mlx5_devx_cmd_destroy(ct_pool->devx_obj));
+		if (cnt) {
+			DRV_LOG(DEBUG, "%u ASO CT objects are being used in the pool %u",
+				cnt, i);
+		}
+		mlx5_free(ct_pool);
+		/* in case of failure. */
+		mng->next--;
+	}
+	mlx5_free(mng->pools);
+	mlx5_free(mng);
+	/* Management structure must be cleared to 0s during allocation. */
+	sh->ct_mng = NULL;
+}
+
 /**
  * Initialize the flow resources' indexed mempool.
  *
@@ -1508,6 +1562,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->mreg_cp_tbl)
 		mlx5_hlist_destroy(priv->mreg_cp_tbl);
 	mlx5_mprq_free_mp(dev);
+	if (priv->sh->ct_mng)
+		mlx5_flow_aso_ct_mng_close(priv->sh);
 	mlx5_os_free_shared_dr(priv);
 	if (priv->rss_conf.rss_key != NULL)
 		mlx5_free(priv->rss_conf.rss_key);
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 0ff19e6171..3c2350a6b8 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -372,6 +372,10 @@ mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
 	case ASO_OPC_MOD_POLICER:
 		sq = &sh->mtrmng->pools_mng.sq;
 		break;
+	case ASO_OPC_MOD_CONNECTION_TRACKING:
+		mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
+		sq = &sh->ct_mng->aso_sq;
+		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
 		return;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 09/17] net/mlx5: add ASO CT query implementation
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (7 preceding siblings ...)
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 08/17] net/mlx5: close CT management structure Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 10/17] net/mlx5: add ASO CT destroy handling Bing Zhao
                     ` (7 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After the connection tracking context is created and being used by
the flows, the context will be updated by the HW automatically after
a packet passed the CT validation. E.g., the ACK, SEQ, window and
state of CT can be updated with both direction traffic.

In order to query the updated contents of this context, a WQE should
be posted to the SQ with a return buffer. The data will be filled
into the buffer. And the profile will be filled with specific value.

During the execution of query command, the context may be updated.
The result of the query command may not be the latest one.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |  10 +-
 drivers/net/mlx5/mlx5_flow_aso.c | 245 +++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c  |  19 +++
 3 files changed, 273 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index de18a59c8e..d2827e78d7 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -490,7 +490,10 @@ struct mlx5_aso_sq_elem {
 			uint16_t burst_size;
 		};
 		struct mlx5_aso_mtr *mtr;
-		struct mlx5_aso_ct_action *ct;
+		struct {
+			struct mlx5_aso_ct_action *ct;
+			char *query_data;
+		};
 	};
 };
 
@@ -1702,5 +1705,10 @@ int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			      struct mlx5_aso_ct_action *ct,
 			      const struct rte_flow_action_conntrack *profile);
+int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+			   struct mlx5_aso_ct_action *ct);
+int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			     struct mlx5_aso_ct_action *ct,
+			     struct rte_flow_action_conntrack *profile);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 3c2350a6b8..3f7ed371bf 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -933,6 +933,7 @@ mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
 	/* Fill next WQE. */
 	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
 	sq->elts[sq->head & mask].ct = ct;
+	sq->elts[sq->head & mask].query_data = NULL;
 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
 	/* Each WQE will have a single CT object. */
 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
@@ -1048,9 +1049,95 @@ mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
 		ct = sq->elts[idx].ct;
 		MLX5_ASSERT(ct);
 		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
+		if (sq->elts[idx].query_data)
+			rte_memcpy(sq->elts[idx].query_data,
+				   (char *)((uintptr_t)sq->mr.addr + idx * 64),
+				   64);
 	}
 }
 
+/*
+ * Post a WQE to the ASO CT SQ to query the current context.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ * @param[in] ct
+ *   Pointer to the generic CT structure related to the context.
+ * @param[in] data
+ *   Pointer to data area to be filled.
+ *
+ * @return
+ *   1 on success (WQE number), 0 on failure.
+ */
+static int
+mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
+			    struct mlx5_aso_ct_action *ct, char *data)
+{
+	volatile struct mlx5_aso_wqe *wqe = NULL;
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t res;
+	uint16_t wqe_idx;
+	struct mlx5_aso_ct_pool *pool;
+	enum mlx5_aso_ct_state state =
+				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+
+	if (state == ASO_CONNTRACK_FREE) {
+		DRV_LOG(ERR, "Fail: No context to query");
+		return -1;
+	} else if (state == ASO_CONNTRACK_WAIT) {
+		return 0;
+	}
+	rte_spinlock_lock(&sq->sqsl);
+	res = size - (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!res)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
+		return 0;
+	}
+	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY);
+	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
+	/* Confirm the location and address of the prefetch instruction. */
+	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
+	/* Fill next WQE. */
+	wqe_idx = sq->head & mask;
+	sq->elts[wqe_idx].ct = ct;
+	sq->elts[wqe_idx].query_data = data;
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	/* Each WQE will have a single CT object. */
+	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
+						  ct->offset);
+	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
+			(ASO_OPC_MOD_CONNECTION_TRACKING <<
+			 WQE_CSEG_OPC_MOD_OFFSET) |
+			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
+	/*
+	 * There is no write request is required.
+	 * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
+	 * "BYTEWISE_64BYTE" is needed for a whole context.
+	 * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
+	 * "data_mask" is ignored.
+	 * Buffer address was already filled during initialization.
+	 */
+	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE <<
+					ASO_CSEG_DATA_MASK_MODE_OFFSET);
+	wqe->aso_cseg.data_mask = 0;
+	sq->head++;
+	/*
+	 * Each WQE contains 2 WQEBB's, even though
+	 * data segment is not used in this case.
+	 */
+	sq->pi += 2;
+	rte_io_wmb();
+	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
+	rte_wmb();
+	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
+	rte_wmb();
+	rte_spinlock_unlock(&sq->sqsl);
+	return 1;
+}
+
 /*
  * Handle completions from WQEs sent to ASO CT.
  *
@@ -1143,3 +1230,161 @@ mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		ct->offset, pool->index);
 	return -1;
 }
+
+/*
+ * The routine is used to wait for WQE completion to continue with queried data.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+		       struct mlx5_aso_ct_action *ct)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+
+	if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+	    ASO_CONNTRACK_READY)
+		return 0;
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+		    ASO_CONNTRACK_READY)
+			return 0;
+		/* Waiting for CQE ready, consider should block or sleep. */
+		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
+	} while (--poll_cqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+}
+
+/*
+ * Convert the hardware conntrack data format into the profile.
+ *
+ * @param[in] profile
+ *   Pointer to conntrack profile to be filled after query.
+ * @param[in] wdata
+ *   Pointer to data fetched from hardware.
+ */
+static inline void
+mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
+			char *wdata)
+{
+	void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
+	void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
+
+	/* MLX5_GET16 should be taken into consideration. */
+	profile->state = (enum rte_flow_conntrack_state)
+			 MLX5_GET(conn_track_aso, wdata, state);
+	profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
+	profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
+					  sack_permitted);
+	profile->live_connection = MLX5_GET(conn_track_aso, wdata,
+					    connection_assured);
+	profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
+						 challenged_acked);
+	profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
+					   max_ack_window);
+	profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
+						 retranmission_limit);
+	profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
+	profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
+	profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
+			      MLX5_GET(conn_track_aso, wdata, last_index);
+	profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
+	profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
+	profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
+	profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
+				reply_direction_tcp_liberal_enabled) |
+				MLX5_GET(conn_track_aso, wdata,
+				original_direction_tcp_liberal_enabled);
+	/* No liberal in the RTE structure profile. */
+	profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					    reply_direction_tcp_scale);
+	profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_close_initiated);
+	profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_data_unacked);
+	profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_max_ack);
+	profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
+					       r_dir, sent_end);
+	profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
+						r_dir, reply_end);
+	profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
+					      r_dir, max_win);
+	profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
+					      r_dir, max_ack);
+	profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					       original_direction_tcp_scale);
+	profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_close_initiated);
+	profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_data_unacked);
+	profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_max_ack);
+	profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
+						  o_dir, sent_end);
+	profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
+						   o_dir, reply_end);
+	profile->original_dir.max_win = MLX5_GET(tcp_window_params,
+						 o_dir, max_win);
+	profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
+						 o_dir, max_ack);
+}
+
+/*
+ * Query connection tracking information parameter by send WQE.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ * @param[out] profile
+ *   Pointer to connection tracking TCP information.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			 struct mlx5_aso_ct_action *ct,
+			 struct rte_flow_action_conntrack *profile)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+	char out_data[64 * 2];
+	int ret;
+
+	MLX5_ASSERT(ct);
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		ret = mlx5_aso_ct_sq_query_single(mng, ct, out_data);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			goto data_handle;
+		/* Waiting for wqe resource or state. */
+		else
+			rte_delay_us_sleep(10u);
+	} while (--poll_wqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+data_handle:
+	ret = mlx5_aso_ct_wait_ready(sh, ct);
+	if (!ret)
+		mlx5_aso_ct_obj_analyze(profile, out_data);
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index c8ff693e4c..84e7f0b3d3 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -13775,6 +13775,8 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 	uint32_t act_idx = (uint32_t)(uintptr_t)handle;
 	uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
 
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
@@ -13788,6 +13790,23 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 			resp->sec_since_last_hit = __atomic_load_n
 			     (&age_param->sec_since_last_hit, __ATOMIC_RELAXED);
 		return 0;
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		ct = flow_aso_ct_get_by_idx(dev, idx);
+		if (!ct->refcnt)
+			return rte_flow_error_set(error, EFAULT,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"CT object is inactive");
+		((struct rte_flow_action_conntrack *)data)->peer_port =
+							ct->peer;
+		((struct rte_flow_action_conntrack *)data)->is_original_dir =
+							ct->is_original;
+		if (mlx5_aso_ct_query_by_wqe(priv->sh, ct, data))
+			return rte_flow_error_set(error, EIO,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"Failed to query CT context");
+		return 0;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 10/17] net/mlx5: add ASO CT destroy handling
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (8 preceding siblings ...)
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 09/17] net/mlx5: add ASO CT query implementation Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 11/17] net/mlx5: add translation of CT action Bing Zhao
                     ` (6 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When trying to destroy an ASO connection tracking context, the DR
action created on this context should also be destroyed. Before
inserting the related software object into the management free list,
the reference count should be checked.

Right now, the context object will not be freed to the system and
will be reused directly from the free list.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_dv.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 84e7f0b3d3..0fa0671ace 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11136,9 +11136,15 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	uint32_t ret;
 	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
-	uint32_t ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
+	enum mlx5_aso_ct_state state =
+			__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
 
+	/* Cannot release when CT is in the ASO SQ. */
+	if (state == ASO_CONNTRACK_WAIT || state == ASO_CONNTRACK_QUERY)
+		return -1;
+	ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
 	if (!ret) {
 		if (ct->dr_action_orig) {
 #ifdef HAVE_MLX5_DR_ACTION_ASO_CT
@@ -11154,6 +11160,8 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 #endif
 			ct->dr_action_rply = NULL;
 		}
+		/* Clear the state to free, no need in 1st allocation. */
+		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_FREE);
 		rte_spinlock_lock(&mng->ct_sl);
 		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
 		rte_spinlock_unlock(&mng->ct_sl);
@@ -13648,6 +13656,12 @@ flow_dv_action_destroy(struct rte_eth_dev *dev,
 			DRV_LOG(DEBUG, "Indirect age action %" PRIu32 " was"
 				" released with references %d.", idx, ret);
 		return 0;
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		ret = flow_dv_aso_ct_release(dev, idx);
+		if (ret)
+			DRV_LOG(DEBUG, "Connection tracking object %u still "
+				"has references %d.", idx, ret);
+		return 0;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 11/17] net/mlx5: add translation of CT action
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (9 preceding siblings ...)
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 10/17] net/mlx5: add ASO CT destroy handling Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 12/17] net/mlx5: add translation of CT item Bing Zhao
                     ` (5 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When creating a flow with this action context for CT, it needs to be
translated in 2 levels.

First, retrieve from action context to rte_flow action.
Second, translate it to the corresponding DR action with traffic
direction that was specified when creating or updating via
rte_flow_action_handle* API.

Before using the DR action in a flow, the CT context should be
available to use in the hardware. A synchronization is done before
inserting the flow rule with CT action to check the HW availability
of this CT context.

In order to release the DR actions and reuse the context of a CT,
the reference count should also be handled in the flow rule
destroying.

The CT index will be recorded in the rte_flow by reusing the ASO age
index to save memory, since only one ASO action is supported in one
flow rule currently. The action context type should also be saved
for CT. When destroying a flow rule, if the context type is CT and
the index is valid (non-zero), the release process should be
handled. By default, the handling will fall back to try to release
the ASO age if any.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |  2 ++
 drivers/net/mlx5/mlx5_flow.c     |  9 +++++++
 drivers/net/mlx5/mlx5_flow.h     |  7 +++++-
 drivers/net/mlx5/mlx5_flow_aso.c | 41 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c  | 28 +++++++++++++++++++++-
 5 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d2827e78d7..d01a10ea54 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1710,5 +1710,7 @@ int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
 int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			     struct mlx5_aso_ct_action *ct,
 			     struct rte_flow_action_conntrack *profile);
+int mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
+			  struct mlx5_aso_ct_action *ct);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index edad6007a8..f36eeae03f 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -3553,6 +3553,15 @@ flow_action_handles_translate(struct rte_eth_dev *dev,
 				break;
 			}
 			/* Fall-through */
+		case MLX5_INDIRECT_ACTION_TYPE_CT:
+			if (priv->sh->ct_aso_en) {
+				translated[handle->index].type =
+					RTE_FLOW_ACTION_TYPE_CONNTRACK;
+				translated[handle->index].conf =
+							 (void *)(uintptr_t)idx;
+				break;
+			}
+			/* Fall-through */
 		default:
 			mlx5_free(translated);
 			return rte_flow_error_set
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 8f2bc7d2f6..286e3fb6a4 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -226,6 +226,7 @@ enum mlx5_feature_name {
 #define MLX5_FLOW_ACTION_TUNNEL_MATCH (1ull << 38)
 #define MLX5_FLOW_ACTION_MODIFY_FIELD (1ull << 39)
 #define MLX5_FLOW_ACTION_METER_WITH_TERMINATED_POLICY (1ull << 40)
+#define MLX5_FLOW_ACTION_CT (1ull << 41)
 
 #define MLX5_FLOW_FATE_ACTIONS \
 	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | \
@@ -969,11 +970,15 @@ struct rte_flow {
 	uint32_t drv_type:2; /**< Driver type. */
 	uint32_t tunnel:1;
 	uint32_t meter:24; /**< Holds flow meter id. */
+	uint32_t indirect_type:2; /**< Indirect action type. */
 	uint32_t rix_mreg_copy;
 	/**< Index to metadata register copy table resource. */
 	uint32_t counter; /**< Holds flow counter. */
 	uint32_t tunnel_id;  /**< Tunnel id */
-	uint32_t age; /**< Holds ASO age bit index. */
+	union {
+		uint32_t age; /**< Holds ASO age bit index. */
+		uint32_t ct; /**< Holds ASO CT index. */
+	};
 	uint32_t geneve_tlv_option; /**< Holds Geneve TLV option id. > */
 } __rte_packed;
 
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 3f7ed371bf..d0a989e213 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -1388,3 +1388,44 @@ mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		mlx5_aso_ct_obj_analyze(profile, out_data);
 	return ret;
 }
+
+/*
+ * Make sure the conntrack context is synchronized with hardware before
+ * creating a flow rule that uses it.
+ *
+ * @param[in] sh
+ *   Pointer to shared device context.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
+		      struct mlx5_aso_ct_action *ct)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	enum mlx5_aso_ct_state state =
+				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+
+	if (state == ASO_CONNTRACK_FREE) {
+		rte_errno = ENXIO;
+		return -rte_errno;
+	} else if (state == ASO_CONNTRACK_READY ||
+		   state == ASO_CONNTRACK_QUERY) {
+		return 0;
+	}
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+		if (state == ASO_CONNTRACK_READY ||
+		    state == ASO_CONNTRACK_QUERY)
+			return 0;
+		/* Waiting for CQE ready, consider should block or sleep. */
+		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
+	} while (--poll_cqe_times);
+	rte_errno = EBUSY;
+	return -rte_errno;
+}
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 0fa0671ace..14af900267 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11381,6 +11381,7 @@ flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
 		return rte_flow_error_set(error, EBUSY,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to update CT");
+	ct->is_original = !!pro->is_original_dir;
 	return idx;
 }
 
@@ -11544,6 +11545,8 @@ flow_dv_translate(struct rte_eth_dev *dev,
 		const struct rte_flow_action *found_action = NULL;
 		uint32_t jump_group = 0;
 		struct mlx5_flow_counter *cnt;
+		uint32_t ct_idx;
+		struct mlx5_aso_ct_action *ct;
 
 		if (!mlx5_flow_os_action_supported(action_type))
 			return rte_flow_error_set(error, ENOTSUP,
@@ -12017,6 +12020,26 @@ flow_dv_translate(struct rte_eth_dev *dev,
 				return -rte_errno;
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			break;
+		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+			ct_idx = (uint32_t)(uintptr_t)action->conf;
+			ct = flow_aso_ct_get_by_idx(dev, ct_idx);
+			if (mlx5_aso_ct_available(priv->sh, ct))
+				return rte_flow_error_set(error, rte_errno,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						NULL,
+						"CT is unavailable.");
+			if (ct->is_original)
+				dev_flow->dv.actions[actions_n] =
+							ct->dr_action_orig;
+			else
+				dev_flow->dv.actions[actions_n] =
+							ct->dr_action_rply;
+			flow->indirect_type = MLX5_INDIRECT_ACTION_TYPE_CT;
+			flow->ct = ct_idx;
+			__atomic_fetch_add(&ct->refcnt, 1, __ATOMIC_RELAXED);
+			actions_n++;
+			action_flags |= MLX5_FLOW_ACTION_CT;
+			break;
 		case RTE_FLOW_ACTION_TYPE_END:
 			actions_end = true;
 			if (mhdr_res->actions_num) {
@@ -13152,7 +13175,10 @@ flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
 			mlx5_flow_meter_detach(priv, fm);
 		flow->meter = 0;
 	}
-	if (flow->age)
+	/* Keep the current age handling by default. */
+	if (flow->indirect_type == MLX5_INDIRECT_ACTION_TYPE_CT && flow->ct)
+		flow_dv_aso_ct_release(dev, flow->ct);
+	else if (flow->age)
 		flow_dv_aso_age_release(dev, flow->age);
 	if (flow->geneve_tlv_option) {
 		flow_dv_geneve_tlv_option_resource_release(dev);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 12/17] net/mlx5: add translation of CT item
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (10 preceding siblings ...)
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 11/17] net/mlx5: add translation of CT action Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 13/17] net/mlx5: add CT context update Bing Zhao
                     ` (4 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The return register of the DR action will be used for matching.
After the ASO CT checking of a TCP packet, the syndrome is filled in
the register. Only the 8 LSB should be used. A converting from
RTE_FLOW_CONNTRACK_FLAG* to the syndrome should be done after
checing the spec and mask fields.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    |  7 ++++
 drivers/net/mlx5/mlx5_flow_dv.c | 62 +++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 286e3fb6a4..eb0bb42161 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -405,6 +405,13 @@ enum mlx5_feature_name {
 /* Maximum number of fields to modify in MODIFY_FIELD */
 #define MLX5_ACT_MAX_MOD_FIELDS 5
 
+/* Syndrome bits definition for connection tracking. */
+#define MLX5_CT_SYNDROME_VALID		(0x0 << 6)
+#define MLX5_CT_SYNDROME_INVALID	(0x1 << 6)
+#define MLX5_CT_SYNDROME_TRAP		(0x2 << 6)
+#define MLX5_CT_SYNDROME_STATE_CHANGE	(0x1 << 1)
+#define MLX5_CT_SYNDROME_BAD_PACKET	(0x1 << 0)
+
 enum mlx5_flow_drv_type {
 	MLX5_FLOW_TYPE_MIN,
 	MLX5_FLOW_TYPE_DV,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 14af900267..b0858e3df8 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -9379,6 +9379,64 @@ flow_dv_translate_item_ecpri(struct rte_eth_dev *dev, void *matcher,
 	}
 }
 
+/*
+ * Add connection tracking status item to matcher
+ *
+ * @param[in] dev
+ *   The devich to configure through.
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ */
+static void
+flow_dv_translate_item_aso_ct(struct rte_eth_dev *dev,
+			      void *matcher, void *key,
+			      const struct rte_flow_item *item)
+{
+	uint32_t reg_value = 0;
+	int reg_id;
+	/* 8LSB 0b 11/0000/11, middle 4 bits are reserved. */
+	uint32_t reg_mask = 0;
+	const struct rte_flow_item_conntrack *spec = item->spec;
+	const struct rte_flow_item_conntrack *mask = item->mask;
+	uint32_t flags;
+	struct rte_flow_error error;
+
+	if (!mask)
+		mask = &rte_flow_item_conntrack_mask;
+	if (!spec || !mask->flags)
+		return;
+	flags = spec->flags & mask->flags;
+	/* The conflict should be checked in the validation. */
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_VALID)
+		reg_value |= MLX5_CT_SYNDROME_VALID;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_CHANGED)
+		reg_value |= MLX5_CT_SYNDROME_STATE_CHANGE;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_INVALID)
+		reg_value |= MLX5_CT_SYNDROME_INVALID;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED)
+		reg_value |= MLX5_CT_SYNDROME_TRAP;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD)
+		reg_value |= MLX5_CT_SYNDROME_BAD_PACKET;
+	if (mask->flags & (RTE_FLOW_CONNTRACK_PKT_STATE_VALID |
+			   RTE_FLOW_CONNTRACK_PKT_STATE_INVALID |
+			   RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED))
+		reg_mask |= 0xc0;
+	if (mask->flags & RTE_FLOW_CONNTRACK_PKT_STATE_CHANGED)
+		reg_mask |= MLX5_CT_SYNDROME_STATE_CHANGE;
+	if (mask->flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD)
+		reg_mask |= MLX5_CT_SYNDROME_BAD_PACKET;
+	/* The REG_C_x value could be saved during startup. */
+	reg_id = mlx5_flow_get_reg_id(dev, MLX5_ASO_CONNTRACK, 0, &error);
+	if (reg_id == REG_NON)
+		return;
+	flow_dv_match_meta_reg(matcher, key, (enum modify_reg)reg_id,
+			       reg_value, reg_mask);
+}
+
 static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
 
 #define HEADER_IS_ZERO(match_criteria, headers)				     \
@@ -12322,6 +12380,10 @@ flow_dv_translate(struct rte_eth_dev *dev,
 			/* No other protocol should follow eCPRI layer. */
 			last_item = MLX5_FLOW_LAYER_ECPRI;
 			break;
+		case RTE_FLOW_ITEM_TYPE_CONNTRACK:
+			flow_dv_translate_item_aso_ct(dev, match_mask,
+						      match_value, items);
+			break;
 		default:
 			break;
 		}
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 13/17] net/mlx5: add CT context update
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (11 preceding siblings ...)
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 12/17] net/mlx5: add translation of CT item Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 14/17] net/mlx5: validation of CT action Bing Zhao
                     ` (3 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When updating a connection tracking context, two separate parts
could be updated.
First, the direction. This will only update the traffic direction
recorded in the software for flow creation.
Second, the TCP parameters. The hardware context will be updated
via the WQE. This update will be blocked until the hardware status
is updated and ready for the next flow creation.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_dv.c | 56 +++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index b0858e3df8..6afbbbc4bb 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -13824,6 +13824,60 @@ __flow_dv_action_rss_update(struct rte_eth_dev *dev, uint32_t idx,
 	return ret;
 }
 
+/*
+ * Updates in place conntrack context or direction.
+ * Context update should be synchronized.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   The conntrack object ID to be updated.
+ * @param[in] update
+ *   Pointer to the structure of information to update.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value.
+ */
+static int
+__flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
+			   const struct rte_flow_modify_conntrack *update,
+			   struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
+	const struct rte_flow_action_conntrack *new_prf;
+	int ret = 0;
+
+	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (!ct->refcnt)
+		return rte_flow_error_set(error, ENOMEM,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "CT object is inactive");
+	new_prf = &update->new_ct;
+	if (update->direction)
+		ct->is_original = !!new_prf->is_original_dir;
+	if (update->state) {
+		ret = mlx5_aso_ct_update_by_wqe(priv->sh, ct, new_prf);
+		if (ret)
+			return rte_flow_error_set(error, EIO,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"Failed to send CT context update WQE");
+		/* Block until ready or a failure. */
+		ret = mlx5_aso_ct_available(priv->sh, ct);
+		if (ret)
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					   NULL,
+					   "Timeout to get the CT update");
+	}
+	return ret;
+}
+
 /**
  * Updates in place shared action configuration, lock free,
  * (mutex should be acquired by caller).
@@ -13859,6 +13913,8 @@ flow_dv_action_update(struct rte_eth_dev *dev,
 	case MLX5_INDIRECT_ACTION_TYPE_RSS:
 		action_conf = ((const struct rte_flow_action *)update)->conf;
 		return __flow_dv_action_rss_update(dev, idx, action_conf, err);
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		return __flow_dv_action_ct_update(dev, idx, update, err);
 	default:
 		return rte_flow_error_set(err, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 14/17] net/mlx5: validation of CT action
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (12 preceding siblings ...)
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 13/17] net/mlx5: add CT context update Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 15/17] net/mlx5: validation of CT item Bing Zhao
                     ` (2 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The validation of a CT action contains two parts. The first is the
CT action configurations parameter. When creating a CT action
context, some members need to be verified.

The second is that when creating a flow, the DR action of CT should
be validated with other actions and items as well. Currently, only
the TCP protocol support connection tracking.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h         |  4 ++
 drivers/net/mlx5/mlx5_flow.c    | 31 +++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c | 69 +++++++++++++++++++++++++++++++++
 3 files changed, 104 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d01a10ea54..36b7f05822 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1611,6 +1611,10 @@ int mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow,
 void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
 int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
 			uint32_t nb_contexts, struct rte_flow_error *error);
+int mlx5_validate_action_ct(struct rte_eth_dev *dev,
+			    const struct rte_flow_action_conntrack *conntrack,
+			    struct rte_flow_error *error);
+
 
 /* mlx5_mp_os.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index f36eeae03f..6baaefbaba 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1688,6 +1688,37 @@ mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
 	return 0;
 }
 
+/*
+ * Validate the ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] conntrack
+ *   Pointer to the CT action profile.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_validate_action_ct(struct rte_eth_dev *dev,
+			const struct rte_flow_action_conntrack *conntrack,
+			struct rte_flow_error *error)
+{
+	RTE_SET_USED(dev);
+
+	if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Invalid CT state");
+	if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Invalid last TCP packet flag");
+	return 0;
+}
+
 /**
  * Verify the @p attributes will be correctly understood by the NIC and store
  * them in the @p flow if everything is correct.
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 6afbbbc4bb..f2a2c609e2 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -3400,6 +3400,57 @@ flow_dv_validate_action_raw_encap_decap
 	return 0;
 }
 
+/*
+ * Validate the ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] action_flags
+ *   Holds the actions detected until now.
+ * @param[in] item_flags
+ *   The items found in this flow rule.
+ * @param[in] attr
+ *   Pointer to flow attributes.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_action_aso_ct(struct rte_eth_dev *dev,
+			       uint64_t action_flags,
+			       uint64_t item_flags,
+			       const struct rte_flow_attr *attr,
+			       struct rte_flow_error *error)
+{
+	RTE_SET_USED(dev);
+
+	if (attr->group == 0 && !attr->transfer)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "Only support non-root table");
+	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "CT cannot follow a fate action");
+	if ((action_flags & MLX5_FLOW_ACTION_METER) ||
+	    (action_flags & MLX5_FLOW_ACTION_AGE))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Only one ASO action is supported");
+	if (action_flags & MLX5_FLOW_ACTION_ENCAP)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Encap cannot exist before CT");
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					  "Not a outer TCP packet");
+	return 0;
+}
+
 /**
  * Match encap_decap resource.
  *
@@ -7205,6 +7256,14 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			rw_act_num += ret;
 			break;
+		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+			ret = flow_dv_validate_action_aso_ct(dev, action_flags,
+							     item_flags, attr,
+							     error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_FLOW_ACTION_CT;
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ACTION,
@@ -13861,6 +13920,10 @@ __flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
 	if (update->direction)
 		ct->is_original = !!new_prf->is_original_dir;
 	if (update->state) {
+		/* Only validate the profile when it needs to be updated. */
+		ret = mlx5_validate_action_ct(dev, new_prf, error);
+		if (ret)
+			return ret;
 		ret = mlx5_aso_ct_update_by_wqe(priv->sh, ct, new_prf);
 		if (ret)
 			return rte_flow_error_set(error, EIO,
@@ -15732,6 +15795,12 @@ flow_dv_action_validate(struct rte_eth_dev *dev,
 						NULL,
 					     "shared age action not supported");
 		return flow_dv_validate_action_age(0, action, dev, err);
+	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+		if (!priv->sh->ct_aso_en)
+			return rte_flow_error_set(err, ENOTSUP,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					"ASO CT is not supported");
+		return mlx5_validate_action_ct(dev, action->conf, err);
 	default:
 		return rte_flow_error_set(err, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 15/17] net/mlx5: validation of CT item
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (13 preceding siblings ...)
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 14/17] net/mlx5: validation of CT action Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 16/17] net/mlx5: add support of CT between two ports Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 17/17] doc: update mlx5 support for conntrack Bing Zhao
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The item of ASO connection tracking will be translated into the
register value when matching. The validation of this item has no
dependency on other layers, since the flow including this item
should be jumped from another group. All the layers checking was
already done in the previous groups. Only the state bits conflict
should be checked.

It is assumed that the flow with CT item will always work on the
TCP traffic.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    |  3 ++
 drivers/net/mlx5/mlx5_flow_dv.c | 51 +++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index eb0bb42161..238befa2d4 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -147,6 +147,9 @@ enum mlx5_feature_name {
 #define MLX5_FLOW_LAYER_GENEVE_OPT (UINT64_C(1) << 32)
 #define MLX5_FLOW_LAYER_GTP_PSC (UINT64_C(1) << 33)
 
+/* Conntrack item. */
+#define MLX5_FLOW_LAYER_ASO_CT (UINT64_C(1) << 34)
+
 /* Outer Masks. */
 #define MLX5_FLOW_LAYER_OUTER_L3 \
 	(MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index f2a2c609e2..aa0a5acdca 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -2598,6 +2598,51 @@ flow_dv_validate_item_ipv6_frag_ext(const struct rte_flow_item *item,
 				  "specified range not supported");
 }
 
+/*
+ * Validate ASO CT item.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Pointer to bit-fields that holds the items detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_item_aso_ct(struct rte_eth_dev *dev,
+			     const struct rte_flow_item *item,
+			     uint64_t *item_flags,
+			     struct rte_flow_error *error)
+{
+	const struct rte_flow_item_conntrack *spec = item->spec;
+	const struct rte_flow_item_conntrack *mask = item->mask;
+	RTE_SET_USED(dev);
+	uint32_t flags;
+
+	if (*item_flags & MLX5_FLOW_LAYER_ASO_CT)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+					  "Only one CT is supported");
+	if (!mask)
+		mask = &rte_flow_item_conntrack_mask;
+	flags = spec->flags & mask->flags;
+	if ((flags & RTE_FLOW_CONNTRACK_PKT_STATE_VALID) &&
+	    ((flags & RTE_FLOW_CONNTRACK_PKT_STATE_INVALID) ||
+	     (flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD) ||
+	     (flags & RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED)))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+					  "Conflict status bits");
+	/* State change also needs to be considered. */
+	*item_flags |= MLX5_FLOW_LAYER_ASO_CT;
+	return 0;
+}
+
 /**
  * Validate the pop VLAN action.
  *
@@ -6696,6 +6741,12 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 				return ret;
 			last_item = MLX5_FLOW_LAYER_ECPRI;
 			break;
+		case RTE_FLOW_ITEM_TYPE_CONNTRACK:
+			ret = flow_dv_validate_item_aso_ct(dev, items,
+							   &item_flags, error);
+			if (ret < 0)
+				return ret;
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ITEM,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 16/17] net/mlx5: add support of CT between two ports
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (14 preceding siblings ...)
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 15/17] net/mlx5: validation of CT item Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 17/17] doc: update mlx5 support for conntrack Bing Zhao
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After creating a connection tracking context, it can be used between
two ports. For each port, the flow for one direction traffic will
be created.

The context can only be shared between the owner port and the peer
port that was specified when being created. Only the owner port
could update the context or query it in current implementation.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    | 57 +++++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_flow_dv.c | 71 +++++++++++++++++++++++++--------
 2 files changed, 110 insertions(+), 18 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 238befa2d4..ddaba40f72 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -48,6 +48,25 @@ enum {
 	MLX5_INDIRECT_ACTION_TYPE_CT,
 };
 
+/* Now, the maximal ports will be supported is 256, action number is 4M. */
+#define MLX5_INDIRECT_ACT_CT_MAX_PORT 0x100
+
+#define MLX5_INDIRECT_ACT_CT_OWNER_SHIFT 22
+#define MLX5_INDIRECT_ACT_CT_OWNER_MASK (MLX5_INDIRECT_ACT_CT_MAX_PORT - 1)
+
+/* 30-31: type, 22-29: owner port, 0-21: index. */
+#define MLX5_INDIRECT_ACT_CT_GEN_IDX(owner, index) \
+	((MLX5_INDIRECT_ACTION_TYPE_CT << MLX5_INDIRECT_ACTION_TYPE_OFFSET) | \
+	 (((owner) & MLX5_INDIRECT_ACT_CT_OWNER_MASK) << \
+	  MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) | (index))
+
+#define MLX5_INDIRECT_ACT_CT_GET_OWNER(index) \
+	(((index) >> MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) & \
+	 MLX5_INDIRECT_ACT_CT_OWNER_MASK)
+
+#define MLX5_INDIRECT_ACT_CT_GET_IDX(index) \
+	((index) & ((1 << MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) - 1))
+
 /* Matches on selected register. */
 struct mlx5_rte_flow_item_tag {
 	enum modify_reg id;
@@ -1304,7 +1323,7 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
 }
 
 /*
- * Get ASO CT action by index.
+ * Get ASO CT action by device and index.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
@@ -1315,7 +1334,7 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
  *   The specified ASO CT action pointer.
  */
 static inline struct mlx5_aso_ct_action *
-flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
+flow_aso_ct_get_by_dev_idx(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
@@ -1330,6 +1349,40 @@ flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
 	return &pool->actions[idx % MLX5_ASO_CT_ACTIONS_PER_POOL];
 }
 
+/*
+ * Get ASO CT action by owner & index.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index to the ASO CT action and owner port combination.
+ *
+ * @return
+ *   The specified ASO CT action pointer.
+ */
+static inline struct mlx5_aso_ct_action *
+flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t own_idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(own_idx);
+	uint32_t idx = MLX5_INDIRECT_ACT_CT_GET_IDX(own_idx);
+
+	if (owner == PORT_ID(priv)) {
+		ct = flow_aso_ct_get_by_dev_idx(dev, idx);
+	} else {
+		struct rte_eth_dev *owndev = &rte_eth_devices[owner];
+
+		MLX5_ASSERT(owner < RTE_MAX_ETHPORTS);
+		if (dev->data->dev_started != 1)
+			return NULL;
+		ct = flow_aso_ct_get_by_dev_idx(owndev, idx);
+		if (ct->peer != PORT_ID(priv))
+			return NULL;
+	}
+	return ct;
+}
+
 int mlx5_flow_group_to_table(struct rte_eth_dev *dev,
 			     const struct mlx5_flow_tunnel *tunnel,
 			     uint32_t group, uint32_t *table,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index aa0a5acdca..ca55cff48b 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11289,7 +11289,7 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
 }
 
 /*
- * Release an ASO CT action.
+ * Release an ASO CT action by its own device.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
@@ -11300,12 +11300,12 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
  *   0 when CT action was removed, otherwise the number of references.
  */
 static inline int
-flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
+flow_dv_aso_ct_dev_release(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
 	uint32_t ret;
-	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
+	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_dev_idx(dev, idx);
 	enum mlx5_aso_ct_state state =
 			__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
 
@@ -11334,7 +11334,21 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
 		rte_spinlock_unlock(&mng->ct_sl);
 	}
-	return ret;
+	return (int)ret;
+}
+
+static inline int
+flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t own_idx)
+{
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(own_idx);
+	uint32_t idx = MLX5_INDIRECT_ACT_CT_GET_IDX(own_idx);
+	struct rte_eth_dev *owndev = &rte_eth_devices[owner];
+	RTE_SET_USED(dev);
+
+	MLX5_ASSERT(owner < RTE_MAX_ETHPORTS);
+	if (dev->data->dev_started != 1)
+		return -1;
+	return flow_dv_aso_ct_dev_release(owndev, idx);
 }
 
 /*
@@ -11486,7 +11500,7 @@ flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
 		RTE_SET_USED(reg_c);
 #endif
 		if (!ct->dr_action_orig) {
-			flow_dv_aso_ct_release(dev, ct_idx);
+			flow_dv_aso_ct_dev_release(dev, ct_idx);
 			rte_flow_error_set(error, rte_errno,
 					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					   "failed to create ASO CT action");
@@ -11502,7 +11516,7 @@ flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
 			 reg_c - REG_C_0);
 #endif
 		if (!ct->dr_action_rply) {
-			flow_dv_aso_ct_release(dev, ct_idx);
+			flow_dv_aso_ct_dev_release(dev, ct_idx);
 			rte_flow_error_set(error, rte_errno,
 					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					   "failed to create ASO CT action");
@@ -11544,12 +11558,13 @@ flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
 		return rte_flow_error_set(error, rte_errno,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to allocate CT object");
-	ct = flow_aso_ct_get_by_idx(dev, idx);
+	ct = flow_aso_ct_get_by_dev_idx(dev, idx);
 	if (mlx5_aso_ct_update_by_wqe(sh, ct, pro))
 		return rte_flow_error_set(error, EBUSY,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to update CT");
 	ct->is_original = !!pro->is_original_dir;
+	ct->peer = pro->peer_port;
 	return idx;
 }
 
@@ -11713,7 +11728,7 @@ flow_dv_translate(struct rte_eth_dev *dev,
 		const struct rte_flow_action *found_action = NULL;
 		uint32_t jump_group = 0;
 		struct mlx5_flow_counter *cnt;
-		uint32_t ct_idx;
+		uint32_t owner_idx;
 		struct mlx5_aso_ct_action *ct;
 
 		if (!mlx5_flow_os_action_supported(action_type))
@@ -12189,8 +12204,13 @@ flow_dv_translate(struct rte_eth_dev *dev,
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			break;
 		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
-			ct_idx = (uint32_t)(uintptr_t)action->conf;
-			ct = flow_aso_ct_get_by_idx(dev, ct_idx);
+			owner_idx = (uint32_t)(uintptr_t)action->conf;
+			ct = flow_aso_ct_get_by_idx(dev, owner_idx);
+			if (!ct)
+				return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						NULL,
+						"Failed to get CT object.");
 			if (mlx5_aso_ct_available(priv->sh, ct))
 				return rte_flow_error_set(error, rte_errno,
 						RTE_FLOW_ERROR_TYPE_ACTION,
@@ -12203,7 +12223,7 @@ flow_dv_translate(struct rte_eth_dev *dev,
 				dev_flow->dv.actions[actions_n] =
 							ct->dr_action_rply;
 			flow->indirect_type = MLX5_INDIRECT_ACTION_TYPE_CT;
-			flow->ct = ct_idx;
+			flow->ct = owner_idx;
 			__atomic_fetch_add(&ct->refcnt, 1, __ATOMIC_RELAXED);
 			actions_n++;
 			action_flags |= MLX5_FLOW_ACTION_CT;
@@ -13803,8 +13823,7 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
 		ret = flow_dv_translate_create_conntrack(dev, action->conf,
 							 err);
-		idx = (MLX5_INDIRECT_ACTION_TYPE_CT <<
-		       MLX5_INDIRECT_ACTION_TYPE_OFFSET) | ret;
+		idx = MLX5_INDIRECT_ACT_CT_GEN_IDX(PORT_ID(priv), ret);
 		break;
 	default:
 		rte_flow_error_set(err, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
@@ -13856,7 +13875,9 @@ flow_dv_action_destroy(struct rte_eth_dev *dev,
 		return 0;
 	case MLX5_INDIRECT_ACTION_TYPE_CT:
 		ret = flow_dv_aso_ct_release(dev, idx);
-		if (ret)
+		if (ret < 0)
+			return ret;
+		if (ret > 0)
 			DRV_LOG(DEBUG, "Connection tracking object %u still "
 				"has references %d.", idx, ret);
 		return 0;
@@ -13960,8 +13981,16 @@ __flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
 	struct mlx5_aso_ct_action *ct;
 	const struct rte_flow_action_conntrack *new_prf;
 	int ret = 0;
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(idx);
+	uint32_t dev_idx;
 
-	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (PORT_ID(priv) != owner)
+		return rte_flow_error_set(error, EACCES,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "CT object owned by another port");
+	dev_idx = MLX5_INDIRECT_ACT_CT_GET_IDX(idx);
+	ct = flow_aso_ct_get_by_dev_idx(dev, dev_idx);
 	if (!ct->refcnt)
 		return rte_flow_error_set(error, ENOMEM,
 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -14049,6 +14078,8 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_action *ct;
+	uint16_t owner;
+	uint32_t dev_idx;
 
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
@@ -14063,7 +14094,15 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 			     (&age_param->sec_since_last_hit, __ATOMIC_RELAXED);
 		return 0;
 	case MLX5_INDIRECT_ACTION_TYPE_CT:
-		ct = flow_aso_ct_get_by_idx(dev, idx);
+		owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(idx);
+		if (owner != PORT_ID(priv))
+			return rte_flow_error_set(error, EACCES,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"CT object owned by another port");
+		dev_idx = MLX5_INDIRECT_ACT_CT_GET_IDX(idx);
+		ct = flow_aso_ct_get_by_dev_idx(dev, dev_idx);
+		MLX5_ASSERT(ct);
 		if (!ct->refcnt)
 			return rte_flow_error_set(error, EFAULT,
 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v4 17/17] doc: update mlx5 support for conntrack
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (15 preceding siblings ...)
  2021-05-05  7:19   ` [dpdk-dev] [PATCH v4 16/17] net/mlx5: add support of CT between two ports Bing Zhao
@ 2021-05-05  7:19   ` Bing Zhao
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  7:19 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

In the release notes and mlx5 NIC document, the support and
limitation of connection tracking are added.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 doc/guides/nics/features/default.ini   |  1 +
 doc/guides/nics/features/mlx5.ini      |  1 +
 doc/guides/nics/mlx5.rst               | 14 ++++++++++++++
 doc/guides/rel_notes/release_21_05.rst |  2 ++
 4 files changed, 18 insertions(+)

diff --git a/doc/guides/nics/features/default.ini b/doc/guides/nics/features/default.ini
index 8046bd121e..0deb4ef547 100644
--- a/doc/guides/nics/features/default.ini
+++ b/doc/guides/nics/features/default.ini
@@ -66,6 +66,7 @@ Module EEPROM dump   =
 Registers dump       =
 LED                  =
 Multiprocess aware   =
+Connection tracking  =
 FreeBSD              =
 Linux                =
 Windows              =
diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index ddd131da16..45dbe75d07 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -45,6 +45,7 @@ Stats per queue      = Y
 FW version           = Y
 Module EEPROM dump   = Y
 Multiprocess aware   = Y
+Connection tracking  = Y
 Linux                = Y
 Windows              = P
 ARMv8                = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 2bb4f18a08..238da94118 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -107,6 +107,7 @@ Features
 - 21844 flow priorities for ingress or egress flow groups greater than 0 and for any transfer
   flow group.
 - Flow metering, including meter policy API.
+- Connection tracking.
 
 Limitations
 -----------
@@ -418,6 +419,14 @@ Limitations
      - RED: must be DROP.
   - meter profile packet mode is supported.
 
+- Connection tracking:
+
+  - connection tracking (conntrack) couldn't co-exist with ASO meter, ASO age action in a single flow rule.
+  - Flow rules insertion rate and memory consumption.
+  - software limitation:
+     - ports: a maximal number of 256.
+     - conntrack: a maximal number of 4M.
+
 Statistics
 ----------
 
@@ -1680,6 +1689,11 @@ Supported hardware offloads
    |                       | | rdma-core 35  | | rdma-core 35  |
    |                       | | ConnectX-5    | | ConnectX-5    |
    +-----------------------+-----------------+-----------------+
+   | Connection tracking   | |               | | DPDK 21.05    |
+   |                       | |     N/A       | | OFED 5.3      |
+   |                       | |               | | rdma-core 35  |
+   |                       | |               | | ConnectX-6 Dx |
+   +-----------------------+-----------------+-----------------+
 
 .. table:: Minimal SW/HW versions for shared action offload
    :name: sact
diff --git a/doc/guides/rel_notes/release_21_05.rst b/doc/guides/rel_notes/release_21_05.rst
index efd68e8c7c..4c4c37ef87 100644
--- a/doc/guides/rel_notes/release_21_05.rst
+++ b/doc/guides/rel_notes/release_21_05.rst
@@ -166,6 +166,8 @@ New Features
   * Added support for ASO (Advanced Steering Operation) meter.
   * Added support for ASO metering by PPS (packet per second).
   * Added support for the monitor policy of Power Management API.
+  * Added support for connection tracking action and item as well as context create,
+    destroy, update and query.
 
 * **Updated NXP DPAA driver.**
 
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (20 preceding siblings ...)
  2021-05-05  7:19 ` [dpdk-dev] [PATCH v4 00/17] conntrack support in mlx5 PMD Bing Zhao
@ 2021-05-05  8:05 ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 01/17] common/mlx5: add connection tracking object definition Bing Zhao
                     ` (16 more replies)
  2021-05-05  9:49 ` [dpdk-dev] [PATCH v6 00/17] conntrack support in mlx5 PMD Bing Zhao
  2021-05-05 12:23 ` [dpdk-dev] [PATCH v7 00/17] conntrack support in mlx5 PMD Bing Zhao
  23 siblings, 17 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

This patch set adds the connection tracking offload support in the
mlx5 driver, as well as the documents update.
 
---
v2: code bug fixes, commits clean up and doc update
v3: fix error input pointer for CT MR registering
v4: fix typo in commit message of patch 11
v5: adjust mkey to lkey in SQ initialization
---

Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

Bing Zhao (17):
  common/mlx5: add connection tracking object definition
  common/mlx5: add CT offload capability checking
  net/mlx5: use meter color reg for CT
  net/mlx5: initialization of CT management
  common/mlx5: add Dexv CT objects creation
  net/mlx5: add modify support for CT
  net/mlx5: add actions creating for CT
  net/mlx5: close CT management structure
  net/mlx5: add ASO CT query implementation
  net/mlx5: add ASO CT destroy handling
  net/mlx5: add translation of CT action
  net/mlx5: add translation of CT item
  net/mlx5: add CT context update
  net/mlx5: validation of CT action
  net/mlx5: validation of CT item
  net/mlx5: add support of CT between two ports
  doc: update mlx5 support for conntrack

 doc/guides/nics/features/default.ini   |   1 +
 doc/guides/nics/features/mlx5.ini      |   1 +
 doc/guides/nics/mlx5.rst               |  14 +
 doc/guides/rel_notes/release_21_05.rst |   2 +
 drivers/common/mlx5/linux/meson.build  |   2 +
 drivers/common/mlx5/mlx5_devx_cmds.c   |  53 +++
 drivers/common/mlx5/mlx5_devx_cmds.h   |   5 +
 drivers/common/mlx5/mlx5_prm.h         |  88 ++++
 drivers/common/mlx5/version.map        |   1 +
 drivers/net/mlx5/linux/mlx5_os.c       |  13 +
 drivers/net/mlx5/mlx5.c                |  92 ++++
 drivers/net/mlx5/mlx5.h                |  76 ++++
 drivers/net/mlx5/mlx5_flow.c           |  44 +-
 drivers/net/mlx5/mlx5_flow.h           | 101 ++++-
 drivers/net/mlx5/mlx5_flow_aso.c       | 592 ++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c        | 601 ++++++++++++++++++++++++-
 16 files changed, 1683 insertions(+), 3 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 01/17] common/mlx5: add connection tracking object definition
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 02/17] common/mlx5: add CT offload capability checking Bing Zhao
                     ` (15 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The structures of ASO connection tracking offload object are added
based on the definitions in the PRM. One CT object context will be
loaded into the cache completely in a reversed order of dwords. The
valid bit should be the MSB of the last dword. This is used for the
conntrack context creation and update, as well as for the query.

The capabilities 2 (HCA_CAP_2) layout is also added. The connection
tracking related capabilities could be queried via the HCA_CAP_2.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/mlx5_prm.h | 85 ++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index efa5ae67bf..4da89d3379 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1119,6 +1119,7 @@ enum {
 	MLX5_GET_HCA_CAP_OP_MOD_ROCE = 0x4 << 1,
 	MLX5_GET_HCA_CAP_OP_MOD_NIC_FLOW_TABLE = 0x7 << 1,
 	MLX5_GET_HCA_CAP_OP_MOD_VDPA_EMULATION = 0x13 << 1,
+	MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE_2 = 0x20 << 1,
 };
 
 #define MLX5_GENERAL_OBJ_TYPES_CAP_VIRTQ_NET_Q \
@@ -1661,6 +1662,29 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties;
 };
 
+struct mlx5_ifc_cmd_hca_cap_2_bits {
+	u8 reserved_at_0[0x80]; /* End of DW4. */
+	u8 reserved_at_80[0xb];
+	u8 log_max_num_reserved_qpn[0x5];
+	u8 reserved_at_90[0x3];
+	u8 log_reserved_qpn_granularity[0x5];
+	u8 reserved_at_98[0x3];
+	u8 log_reserved_qpn_max_alloc[0x5]; /* End of DW5. */
+	u8 max_reformat_insert_size[0x8];
+	u8 max_reformat_insert_offset[0x8];
+	u8 max_reformat_remove_size[0x8];
+	u8 max_reformat_remove_offset[0x8]; /* End of DW6. */
+	u8 aso_conntrack_reg_id[0x8];
+	u8 reserved_at_c8[0x3];
+	u8 log_conn_track_granularity[0x5];
+	u8 reserved_at_d0[0x3];
+	u8 log_conn_track_max_alloc[0x5];
+	u8 reserved_at_d8[0x3];
+	u8 log_max_conn_track_offload[0x5];
+	u8 reserved_at_e0[0x20]; /* End of DW7. */
+	u8 reserved_at_100[0x700];
+};
+
 union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
 	struct mlx5_ifc_per_protocol_networking_offload_caps_bits
@@ -2599,6 +2623,67 @@ struct mlx5_ifc_create_flow_meter_aso_in_bits {
 	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
 	struct mlx5_ifc_flow_meter_aso_bits flow_meter_aso;
 };
+
+struct mlx5_ifc_tcp_window_params_bits {
+	u8 max_ack[0x20];
+	u8 max_win[0x20];
+	u8 reply_end[0x20];
+	u8 sent_end[0x20];
+};
+
+struct mlx5_ifc_conn_track_aso_bits {
+	struct mlx5_ifc_tcp_window_params_bits reply_dir; /* End of DW3. */
+	struct mlx5_ifc_tcp_window_params_bits original_dir; /* End of DW7. */
+	u8 last_end[0x20]; /* End of DW8. */
+	u8 last_ack[0x20]; /* End of DW9. */
+	u8 last_seq[0x20]; /* End of DW10. */
+	u8 last_win[0x10];
+	u8 reserved_at_170[0xa];
+	u8 last_dir[0x1];
+	u8 last_index[0x5]; /* End of DW11. */
+	u8 reserved_at_180[0x40]; /* End of DW13. */
+	u8 reply_direction_tcp_scale[0x4];
+	u8 reply_direction_tcp_close_initiated[0x1];
+	u8 reply_direction_tcp_liberal_enabled[0x1];
+	u8 reply_direction_tcp_data_unacked[0x1];
+	u8 reply_direction_tcp_max_ack[0x1];
+	u8 reserved_at_1c8[0x8];
+	u8 original_direction_tcp_scale[0x4];
+	u8 original_direction_tcp_close_initiated[0x1];
+	u8 original_direction_tcp_liberal_enabled[0x1];
+	u8 original_direction_tcp_data_unacked[0x1];
+	u8 original_direction_tcp_max_ack[0x1];
+	u8 reserved_at_1d8[0x8]; /* End of DW14. */
+	u8 valid[0x1];
+	u8 state[0x3];
+	u8 freeze_track[0x1];
+	u8 reserved_at_1e5[0xb];
+	u8 reserved_at_1f0[0x1];
+	u8 connection_assured[0x1];
+	u8 sack_permitted[0x1];
+	u8 challenged_acked[0x1];
+	u8 heartbeat[0x1];
+	u8 max_ack_window[0x3];
+	u8 reserved_at_1f8[0x1];
+	u8 retransmission_counter[0x3];
+	u8 retranmission_limit_exceeded[0x1];
+	u8 retranmission_limit[0x3]; /* End of DW15. */
+};
+
+struct mlx5_ifc_conn_track_offload_bits {
+	u8 modify_field_select[0x40];
+	u8 reserved_at_40[0x40];
+	u8 reserved_at_80[0x8];
+	u8 conn_track_aso_access_pd[0x18];
+	u8 reserved_at_a0[0x160];
+	struct mlx5_ifc_conn_track_aso_bits conn_track_aso;
+};
+
+struct mlx5_ifc_create_conn_track_aso_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+	struct mlx5_ifc_conn_track_offload_bits conn_track_offload;
+};
+
 enum mlx5_access_aso_opc_mod {
 	ASO_OPC_MOD_IPSEC = 0x0,
 	ASO_OPC_MOD_CONNECTION_TRACKING = 0x1,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 02/17] common/mlx5: add CT offload capability checking
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 01/17] common/mlx5: add connection tracking object definition Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 03/17] net/mlx5: use meter color reg for CT Bing Zhao
                     ` (14 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

During startup, the ASO connection tracking offload capability could
be queried via HCA_CAP_QUERY command. If the HW doesn't support ASO
CT, the value would be 0 by default. The following initialization
should be skipped and the creation of the CT object should return
a failure directly.

The following CT creation should also check this capability. With
the old driver, the pre-processing macro should be used in order to
make the compiling pass.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/linux/meson.build | 2 ++
 drivers/common/mlx5/mlx5_devx_cmds.c  | 3 +++
 drivers/common/mlx5/mlx5_devx_cmds.h  | 1 +
 drivers/common/mlx5/mlx5_prm.h        | 3 +++
 4 files changed, 9 insertions(+)

diff --git a/drivers/common/mlx5/linux/meson.build b/drivers/common/mlx5/linux/meson.build
index 3334bd5cb2..007834a49b 100644
--- a/drivers/common/mlx5/linux/meson.build
+++ b/drivers/common/mlx5/linux/meson.build
@@ -189,6 +189,8 @@ has_sym_args = [
             'MLX5_WQE_UMR_CTRL_FLAG_INLINE' ],
         [ 'HAVE_MLX5_DR_FLOW_DUMP_RULE', 'infiniband/mlx5dv.h',
             'mlx5dv_dump_dr_rule' ],
+        [ 'HAVE_MLX5_DR_ACTION_ASO_CT', 'infiniband/mlx5dv.h',
+            'MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_INITIATOR' ],
 ]
 config = configuration_data()
 foreach arg:has_sym_args
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 79fff6457c..ad67883fde 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -760,6 +760,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
 		MLX5_GET(cmd_hca_cap, hcattr, umr_indirect_mkey_disabled);
 	attr->umr_modify_entity_size_disabled =
 		MLX5_GET(cmd_hca_cap, hcattr, umr_modify_entity_size_disabled);
+	attr->ct_offload = !!(MLX5_GET64(cmd_hca_cap, hcattr,
+					 general_obj_types) &
+			      MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD);
 	if (attr->qos.sup) {
 		MLX5_SET(query_hca_cap_in, in, op_mod,
 			 MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP |
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 870bdb6b30..746320cf04 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -137,6 +137,7 @@ struct mlx5_hca_attr {
 	uint32_t qp_ts_format:2;
 	uint32_t regex:1;
 	uint32_t reg_c_preserve:1;
+	uint32_t ct_offload:1; /* General obj type ASO CT offload supported. */
 	uint32_t regexp_num_of_engines;
 	uint32_t log_max_ft_sampler_num:8;
 	uint32_t geneve_tlv_opt;
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 4da89d3379..71bdf43668 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1134,6 +1134,8 @@ enum {
 			(1ULL << MLX5_GENERAL_OBJ_TYPE_FLOW_METER_ASO)
 #define MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT \
 			(1ULL << MLX5_OBJ_TYPE_GENEVE_TLV_OPT)
+#define MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD \
+			(1ULL << MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD)
 
 enum {
 	MLX5_HCA_CAP_OPMOD_GET_MAX   = 0,
@@ -2456,6 +2458,7 @@ enum {
 	MLX5_GENERAL_OBJ_TYPE_FLEX_PARSE_GRAPH = 0x0022,
 	MLX5_GENERAL_OBJ_TYPE_FLOW_METER_ASO = 0x0024,
 	MLX5_GENERAL_OBJ_TYPE_FLOW_HIT_ASO = 0x0025,
+	MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD = 0x0031,
 };
 
 struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 03/17] net/mlx5: use meter color reg for CT
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 01/17] common/mlx5: add connection tracking object definition Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 02/17] common/mlx5: add CT offload capability checking Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 04/17] net/mlx5: initialization of CT management Bing Zhao
                     ` (13 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Based on the capacity, 3 registers could be used. Due to the register
allocation, only the one REG_C_3 for meter color could be reused
right now.

Then in the same flow, no more than one ASO action can be supported.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.c | 4 +++-
 drivers/net/mlx5/mlx5_flow.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index a7ceafe221..edad6007a8 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -760,7 +760,9 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
 			       REG_C_3;
 	case MLX5_MTR_COLOR:
-	case MLX5_ASO_FLOW_HIT: /* Both features use the same REG_C. */
+	case MLX5_ASO_FLOW_HIT:
+	case MLX5_ASO_CONNTRACK:
+		/* All features use the same REG_C. */
 		MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
 		return priv->mtr_color_reg;
 	case MLX5_COPY_MARK:
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index cc3e79d088..964e13a869 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -84,6 +84,7 @@ enum mlx5_feature_name {
 	MLX5_MTR_COLOR,
 	MLX5_MTR_ID,
 	MLX5_ASO_FLOW_HIT,
+	MLX5_ASO_CONNTRACK,
 };
 
 /* Default queue number. */
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 04/17] net/mlx5: initialization of CT management
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (2 preceding siblings ...)
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 03/17] net/mlx5: use meter color reg for CT Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 05/17] common/mlx5: add Dexv CT objects creation Bing Zhao
                     ` (12 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The definitions of ASO connection tracking objects management
structures are added.

Considering performance, the bulk allocation of ASO CT objects
should be used. The maximal value per bulk and the granularity could
be fetched from HCA capabilities 2. Right now, a fixed number of 64
is used for each bulk for a better management purpose.

The ASO QP for CT is initialized, the SQ will be used for both
modify and query command.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c | 13 +++++++++
 drivers/net/mlx5/mlx5.c          | 36 +++++++++++++++++++++++
 drivers/net/mlx5/mlx5.h          | 50 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_aso.c | 50 ++++++++++++++++++++++++++++++++
 4 files changed, 149 insertions(+)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 479ee7d8d1..5ac787106d 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1323,6 +1323,19 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			DRV_LOG(DEBUG, "Flow Hit ASO is supported.");
 		}
 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */
+#if defined(HAVE_MLX5_DR_CREATE_ACTION_ASO) && \
+	defined(HAVE_MLX5_DR_ACTION_ASO_CT)
+		if (config->hca_attr.ct_offload &&
+		    priv->mtr_color_reg == REG_C_3) {
+			err = mlx5_flow_aso_ct_mng_init(sh);
+			if (err) {
+				err = -err;
+				goto error;
+			}
+			DRV_LOG(DEBUG, "CT ASO is supported.");
+			sh->ct_aso_en = 1;
+		}
+#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO && HAVE_MLX5_DR_ACTION_ASO_CT */
 #if defined(HAVE_MLX5DV_DR) && defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_SAMPLE)
 		if (config->hca_attr.log_max_ft_sampler_num > 0  &&
 		    config->dv_flow_en) {
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 8cd6f1eaee..86dbe6d573 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -670,6 +670,42 @@ mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh)
 	}
 }
 
+/*
+ * Initialize the ASO connection tracking structure.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
+{
+	int err;
+
+	if (sh->ct_mng)
+		return 0;
+	sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng),
+				 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+	if (!sh->ct_mng) {
+		DRV_LOG(ERR, "ASO CT management allocation failed.");
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
+	if (err) {
+		mlx5_free(sh->ct_mng);
+		/* rte_errno should be extracted from the failure. */
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	rte_spinlock_init(&sh->ct_mng->ct_sl);
+	rte_rwlock_init(&sh->ct_mng->resize_rwl);
+	LIST_INIT(&sh->ct_mng->free_cts);
+	return 0;
+}
+
 /**
  * Initialize the flow resources' indexed mempool.
  *
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c62977613a..1a5c78fa3a 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -983,6 +983,52 @@ struct mlx5_bond_info {
 	} ports[MLX5_BOND_MAX_PORTS];
 };
 
+/* Number of connection tracking objects per pool: must be a power of 2. */
+#define MLX5_ASO_CT_ACTIONS_PER_POOL 64
+
+/* ASO Conntrack state. */
+enum mlx5_aso_ct_state {
+	ASO_CONNTRACK_FREE, /* Inactive, in the free list. */
+	ASO_CONNTRACK_WAIT, /* WQE sent in the SQ. */
+	ASO_CONNTRACK_READY, /* CQE received w/o error. */
+	ASO_CONNTRACK_QUERY, /* WQE for query sent. */
+	ASO_CONNTRACK_MAX, /* Guard. */
+};
+
+/* Generic ASO connection tracking structure. */
+struct mlx5_aso_ct_action {
+	LIST_ENTRY(mlx5_aso_ct_action) next; /* Pointer to the next ASO CT. */
+	void *dr_action_orig; /* General action object for original dir. */
+	void *dr_action_rply; /* General action object for reply dir. */
+	uint32_t refcnt; /* Action used count in device flows. */
+	uint16_t offset; /* Offset of ASO CT in DevX objects bulk. */
+	uint16_t peer; /* The only peer port index could also use this CT. */
+	enum mlx5_aso_ct_state state; /* ASO CT state. */
+	bool is_original; /* The direction of the DR action to be used. */
+};
+
+/* ASO connection tracking software pool definition. */
+struct mlx5_aso_ct_pool {
+	uint16_t index; /* Pool index in pools array. */
+	struct mlx5_devx_obj *devx_obj;
+	/* The first devx object in the bulk, used for freeing (not yet). */
+	struct mlx5_aso_ct_action actions[MLX5_ASO_CT_ACTIONS_PER_POOL];
+	/* CT action structures bulk. */
+};
+
+LIST_HEAD(aso_ct_list, mlx5_aso_ct_action);
+
+/* Pools management structure for ASO connection tracking pools. */
+struct mlx5_aso_ct_pools_mng {
+	struct mlx5_aso_ct_pool **pools;
+	uint16_t n; /* Total number of pools. */
+	uint16_t next; /* Number of pools in use, index of next free pool. */
+	rte_spinlock_t ct_sl; /* The ASO CT free list lock. */
+	rte_rwlock_t resize_rwl; /* The ASO CT pool resize lock. */
+	struct aso_ct_list free_cts; /* Free ASO CT objects list. */
+	struct mlx5_aso_sq aso_sq; /* ASO queue objects. */
+};
+
 /*
  * Shared Infiniband device context for Master/Representors
  * which belong to same IB device with multiple IB ports.
@@ -996,6 +1042,7 @@ struct mlx5_dev_ctx_shared {
 	uint32_t sq_ts_format:2; /* SQ timestamp formats supported. */
 	uint32_t qp_ts_format:2; /* QP timestamp formats supported. */
 	uint32_t meter_aso_en:1; /* Flow Meter ASO is supported. */
+	uint32_t ct_aso_en:1; /* Connection Tracking ASO is supported. */
 	uint32_t max_port; /* Maximal IB device port index. */
 	struct mlx5_bond_info bond; /* Bonding information. */
 	void *ctx; /* Verbs/DV/DevX context. */
@@ -1058,6 +1105,8 @@ struct mlx5_dev_ctx_shared {
 	rte_spinlock_t geneve_tlv_opt_sl; /* Lock for geneve tlv resource */
 	struct mlx5_flow_mtr_mng *mtrmng;
 	/* Meter management structure. */
+	struct mlx5_aso_ct_pools_mng *ct_mng;
+	/* Management data for ASO connection tracking. */
 	struct mlx5_dev_shared_port port[]; /* per device port data array. */
 };
 
@@ -1355,6 +1404,7 @@ bool mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev);
 int mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev);
 int mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh);
 int mlx5_aso_flow_mtrs_mng_init(struct mlx5_dev_ctx_shared *sh);
+int mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh);
 
 /* mlx5_ethdev.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 300987d0e9..c24d865284 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -186,6 +186,43 @@ mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq)
 	}
 }
 
+/*
+ * Initialize Send Queue used for ASO connection tracking.
+ *
+ * @param[in] sq
+ *   ASO SQ to initialize.
+ */
+static void
+mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
+{
+	volatile struct mlx5_aso_wqe *restrict wqe;
+	int i;
+	int size = 1 << sq->log_desc_n;
+	uint64_t addr;
+
+	/* All the next fields state should stay constant. */
+	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
+		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
+							  (sizeof(*wqe) >> 4));
+		/* One unique MR for the query data. */
+		wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.mkey->id);
+		/* Magic number 64 represents the length of a ASO CT obj. */
+		addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64);
+		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
+		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
+		/*
+		 * The values of operand_masks are different for modify
+		 * and query.
+		 * And data_mask may be different for each modification. In
+		 * query, it could be zero and ignored.
+		 * CQE generation is always needed, in order to decide when
+		 * it is available to create the flow or read the data.
+		 */
+		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
+						   MLX5_COMP_MODE_OFFSET);
+	}
+}
+
 /**
  * Create Send Queue used for ASO access.
  *
@@ -293,6 +330,19 @@ mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
 			return -1;
 		mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
 		break;
+	case ASO_OPC_MOD_CONNECTION_TRACKING:
+		/* 64B per object for query. */
+		if (mlx5_aso_reg_mr(sh, 64 * sq_desc_n,
+				    &sh->ct_mng->aso_sq.mr, 0))
+			return -1;
+		if (mlx5_aso_sq_create(sh->ctx, &sh->ct_mng->aso_sq, 0,
+				sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
+				sh->sq_ts_format)) {
+			mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
+			return -1;
+		}
+		mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
+		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
 		return -1;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 05/17] common/mlx5: add Dexv CT objects creation
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (3 preceding siblings ...)
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 04/17] net/mlx5: initialization of CT management Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 06/17] net/mlx5: add modify support for CT Bing Zhao
                     ` (11 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Adding support for connection tracking ASO creation via Devx command.
Right now only bulk creation is supported.

By default, the objects with zero contents will be created. Before
using a single object, the modification via posting a WQE to the ASO
CT SQ is needed.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c | 50 ++++++++++++++++++++++++++++
 drivers/common/mlx5/mlx5_devx_cmds.h |  4 +++
 drivers/common/mlx5/version.map      |  1 +
 3 files changed, 55 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index ad67883fde..dc01266642 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -2232,6 +2232,56 @@ mlx5_devx_cmd_create_flow_meter_aso_obj(void *ctx, uint32_t pd,
 	return flow_meter_aso_obj;
 }
 
+/*
+ * Create general object of type CONN_TRACK_OFFLOAD using DevX API.
+ *
+ * @param[in] ctx
+ *   Context returned from mlx5 open_device() glue function.
+ * @param [in] pd
+ *   PD value to associate the CONN_TRACK_OFFLOAD ASO object with.
+ * @param [in] log_obj_size
+ *   log_obj_size to allocate its power of 2 * objects
+ *   in one CONN_TRACK_OFFLOAD bulk allocation.
+ *
+ * @return
+ *   The DevX object created, NULL otherwise and rte_errno is set.
+ */
+struct mlx5_devx_obj *
+mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx, uint32_t pd,
+					    uint32_t log_obj_size)
+{
+	uint32_t in[MLX5_ST_SZ_DW(create_conn_track_aso_in)] = {0};
+	uint32_t out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	struct mlx5_devx_obj *ct_aso_obj;
+	void *ptr;
+
+	ct_aso_obj = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ct_aso_obj),
+				 0, SOCKET_ID_ANY);
+	if (!ct_aso_obj) {
+		DRV_LOG(ERR, "Failed to allocate CONN_TRACK_OFFLOAD object.");
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	ptr = MLX5_ADDR_OF(create_conn_track_aso_in, in, hdr);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, obj_type,
+		 MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, log_obj_range, log_obj_size);
+	ptr = MLX5_ADDR_OF(create_conn_track_aso_in, in, conn_track_offload);
+	MLX5_SET(conn_track_offload, ptr, conn_track_aso_access_pd, pd);
+	ct_aso_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
+						     out, sizeof(out));
+	if (!ct_aso_obj->obj) {
+		rte_errno = errno;
+		DRV_LOG(ERR, "Failed to create CONN_TRACK_OFFLOAD obj by using DevX.");
+		mlx5_free(ct_aso_obj);
+		return NULL;
+	}
+	ct_aso_obj->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	return ct_aso_obj;
+}
+
 /**
  * Create general object of type GENEVE TLV option using DevX API.
  *
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 746320cf04..e67cea506d 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -569,6 +569,10 @@ struct mlx5_devx_obj *mlx5_devx_cmd_queue_counter_alloc(void *ctx);
 __rte_internal
 int mlx5_devx_cmd_queue_counter_query(struct mlx5_devx_obj *dcs, int clear,
 				      uint32_t *out_of_buffers);
+__rte_internal
+struct mlx5_devx_obj *mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx,
+					uint32_t pd, uint32_t log_obj_size);
+
 /**
  * Create general object of type FLOW_METER_ASO using DevX API..
  *
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index 18dc96276d..4bbcba5b8e 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -13,6 +13,7 @@ INTERNAL {
 	mlx5_dev_to_pci_addr; # WINDOWS_NO_EXPORT
 
 	mlx5_devx_cmd_alloc_pd;
+	mlx5_devx_cmd_create_conn_track_offload_obj;
 	mlx5_devx_cmd_create_cq;
 	mlx5_devx_cmd_create_flex_parser;
 	mlx5_devx_cmd_create_qp;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 06/17] net/mlx5: add modify support for CT
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (4 preceding siblings ...)
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 05/17] common/mlx5: add Dexv CT objects creation Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 07/17] net/mlx5: add actions creating " Bing Zhao
                     ` (10 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After the connection tracking object bulk is allocated, all the
objects' contents are filled with zero by default. Every
new-allocated object must be modified via WQE operation before it is
used.

In order to reduce the latency for the flow creation, an asynchronous
way is used instead of busy waiting for the CQE to be generated.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |   8 +
 drivers/net/mlx5/mlx5_flow.h     |   3 +
 drivers/net/mlx5/mlx5_flow_aso.c | 252 +++++++++++++++++++++++++++++++
 3 files changed, 263 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1a5c78fa3a..1898a0401f 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -490,6 +490,7 @@ struct mlx5_aso_sq_elem {
 			uint16_t burst_size;
 		};
 		struct mlx5_aso_mtr *mtr;
+		struct mlx5_aso_ct_action *ct;
 	};
 };
 
@@ -1007,6 +1008,10 @@ struct mlx5_aso_ct_action {
 	bool is_original; /* The direction of the DR action to be used. */
 };
 
+/* CT action object state update. */
+#define MLX5_ASO_CT_UPDATE_STATE(c, s) \
+	__atomic_store_n(&((c)->state), (s), __ATOMIC_RELAXED)
+
 /* ASO connection tracking software pool definition. */
 struct mlx5_aso_ct_pool {
 	uint16_t index; /* Pool index in pools array. */
@@ -1690,5 +1695,8 @@ int mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr);
 int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr);
+int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			      struct mlx5_aso_ct_action *ct,
+			      const struct rte_flow_action_conntrack *profile);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 964e13a869..eb5b53ac6a 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -45,6 +45,7 @@ enum mlx5_rte_flow_action_type {
 enum {
 	MLX5_INDIRECT_ACTION_TYPE_RSS,
 	MLX5_INDIRECT_ACTION_TYPE_AGE,
+	MLX5_INDIRECT_ACTION_TYPE_AGE,
 };
 
 /* Matches on selected register. */
@@ -839,6 +840,8 @@ struct mlx5_flow {
 #define MLX5_ASO_WQE_CQE_RESPONSE_DELAY 10u
 #define MLX5_MTR_POLL_WQE_CQE_TIMES 100000u
 
+#define MLX5_CT_POLL_WQE_CQE_TIMES MLX5_MTR_POLL_WQE_CQE_TIMES
+
 #define MLX5_MAN_WIDTH 8
 /* Legacy Meter parameter structure. */
 struct mlx5_legacy_flow_meter {
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index c24d865284..0ff19e6171 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -887,3 +887,255 @@ mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 			mtr->offset);
 	return -1;
 }
+
+/*
+ * Post a WQE to the ASO CT SQ to modify the context.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ * @param[in] ct
+ *   Pointer to the generic CT structure related to the context.
+ * @param[in] profile
+ *   Pointer to configuration profile.
+ *
+ * @return
+ *   1 on success (WQE number), 0 on failure.
+ */
+static uint16_t
+mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
+			      struct mlx5_aso_ct_action *ct,
+			      const struct rte_flow_action_conntrack *profile)
+{
+	volatile struct mlx5_aso_wqe *wqe = NULL;
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t res;
+	struct mlx5_aso_ct_pool *pool;
+	void *desg;
+	void *orig_dir;
+	void *reply_dir;
+
+	rte_spinlock_lock(&sq->sqsl);
+	/* Prevent other threads to update the index. */
+	res = size - (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!res)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
+		return 0;
+	}
+	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
+	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
+	/* Fill next WQE. */
+	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
+	sq->elts[sq->head & mask].ct = ct;
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	/* Each WQE will have a single CT object. */
+	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
+						  ct->offset);
+	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
+			(ASO_OPC_MOD_CONNECTION_TRACKING <<
+			 WQE_CSEG_OPC_MOD_OFFSET) |
+			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
+	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
+			(0u |
+			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
+			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
+	wqe->aso_cseg.data_mask = UINT64_MAX;
+	/* To make compiler happy. */
+	desg = (void *)(uintptr_t)wqe->aso_dseg.data;
+	MLX5_SET(conn_track_aso, desg, valid, 1);
+	MLX5_SET(conn_track_aso, desg, state, profile->state);
+	MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable);
+	MLX5_SET(conn_track_aso, desg, connection_assured,
+		 profile->live_connection);
+	MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack);
+	MLX5_SET(conn_track_aso, desg, challenged_acked,
+		 profile->challenge_ack_passed);
+	/* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */
+	MLX5_SET(conn_track_aso, desg, heartbeat, 0);
+	MLX5_SET(conn_track_aso, desg, max_ack_window,
+		 profile->max_ack_window);
+	MLX5_SET(conn_track_aso, desg, retransmission_counter, 0);
+	MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0);
+	MLX5_SET(conn_track_aso, desg, retranmission_limit,
+		 profile->retransmission_limit);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale,
+		 profile->reply_dir.scale);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated,
+		 profile->reply_dir.close_initiated);
+	/* Both directions will use the same liberal mode. */
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled,
+		 profile->liberal_mode);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked,
+		 profile->reply_dir.data_unacked);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack,
+		 profile->reply_dir.last_ack_seen);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale,
+		 profile->original_dir.scale);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated,
+		 profile->original_dir.close_initiated);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled,
+		 profile->liberal_mode);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked,
+		 profile->original_dir.data_unacked);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack,
+		 profile->original_dir.last_ack_seen);
+	MLX5_SET(conn_track_aso, desg, last_win, profile->last_window);
+	MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction);
+	MLX5_SET(conn_track_aso, desg, last_index, profile->last_index);
+	MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq);
+	MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack);
+	MLX5_SET(conn_track_aso, desg, last_end, profile->last_end);
+	orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir);
+	MLX5_SET(tcp_window_params, orig_dir, sent_end,
+		 profile->original_dir.sent_end);
+	MLX5_SET(tcp_window_params, orig_dir, reply_end,
+		 profile->original_dir.reply_end);
+	MLX5_SET(tcp_window_params, orig_dir, max_win,
+		 profile->original_dir.max_win);
+	MLX5_SET(tcp_window_params, orig_dir, max_ack,
+		 profile->original_dir.max_ack);
+	reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir);
+	MLX5_SET(tcp_window_params, reply_dir, sent_end,
+		 profile->reply_dir.sent_end);
+	MLX5_SET(tcp_window_params, reply_dir, reply_end,
+		 profile->reply_dir.reply_end);
+	MLX5_SET(tcp_window_params, reply_dir, max_win,
+		 profile->reply_dir.max_win);
+	MLX5_SET(tcp_window_params, reply_dir, max_ack,
+		 profile->reply_dir.max_ack);
+	sq->head++;
+	sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
+	rte_io_wmb();
+	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
+	rte_wmb();
+	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
+	rte_wmb();
+	rte_spinlock_unlock(&sq->sqsl);
+	return 1;
+}
+
+/*
+ * Update the status field of CTs to indicate ready to be used by flows.
+ * A continuous number of CTs since last update.
+ *
+ * @param[in] sq
+ *   Pointer to ASO CT SQ.
+ * @param[in] num
+ *   Number of CT structures to be updated.
+ *
+ * @return
+ *   0 on success, a negative value.
+ */
+static void
+mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
+{
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t i;
+	struct mlx5_aso_ct_action *ct = NULL;
+	uint16_t idx;
+
+	for (i = 0; i < num; i++) {
+		idx = (uint16_t)((sq->tail + i) & mask);
+		ct = sq->elts[idx].ct;
+		MLX5_ASSERT(ct);
+		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
+	}
+}
+
+/*
+ * Handle completions from WQEs sent to ASO CT.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ */
+static void
+mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
+{
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	struct mlx5_aso_cq *cq = &sq->cq;
+	volatile struct mlx5_cqe *restrict cqe;
+	const uint32_t cq_size = 1 << cq->log_desc_n;
+	const uint32_t mask = cq_size - 1;
+	uint32_t idx;
+	uint32_t next_idx;
+	uint16_t max;
+	uint16_t n = 0;
+	int ret;
+
+	rte_spinlock_lock(&sq->sqsl);
+	max = (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!max)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		return;
+	}
+	next_idx = cq->cq_ci & mask;
+	do {
+		idx = next_idx;
+		next_idx = (cq->cq_ci + 1) & mask;
+		/* Need to confirm the position of the prefetch. */
+		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
+		cqe = &cq->cq_obj.cqes[idx];
+		ret = check_cqe(cqe, cq_size, cq->cq_ci);
+		/*
+		 * Be sure owner read is done before any other cookie field or
+		 * opaque field.
+		 */
+		rte_io_rmb();
+		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
+			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
+				break;
+			mlx5_aso_cqe_err_handle(sq);
+		} else {
+			n++;
+		}
+		cq->cq_ci++;
+	} while (1);
+	if (likely(n)) {
+		mlx5_aso_ct_status_update(sq, n);
+		sq->tail += n;
+		rte_io_wmb();
+		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
+	}
+	rte_spinlock_unlock(&sq->sqsl);
+}
+
+/*
+ * Update connection tracking ASO context by sending WQE.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ * @param[in] profile
+ *   Pointer to connection tracking TCP parameter.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			  struct mlx5_aso_ct_action *ct,
+			  const struct rte_flow_action_conntrack *profile)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+
+	MLX5_ASSERT(ct);
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		if (mlx5_aso_ct_sq_enqueue_single(mng, ct, profile))
+			return 0;
+		/* Waiting for wqe resource. */
+		rte_delay_us_sleep(10u);
+	} while (--poll_wqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+}
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 07/17] net/mlx5: add actions creating for CT
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (5 preceding siblings ...)
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 06/17] net/mlx5: add modify support for CT Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 08/17] net/mlx5: close CT management structure Bing Zhao
                     ` (9 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Allocating a CT from the management pools and creating the DR actions
for both directions by default.

If there is no available connection tracking action, a new pool will
be created with a fixed size bulk allocation. Right now, all the
resources are controlled by the linked list.

The ASO connection tracking context associated with these actions
need to be updated via WQE before using for steering.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h         |   4 +
 drivers/net/mlx5/mlx5_flow.h    |  29 +++-
 drivers/net/mlx5/mlx5_flow_dv.c | 263 ++++++++++++++++++++++++++++++++
 3 files changed, 295 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1898a0401f..de18a59c8e 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -987,6 +987,10 @@ struct mlx5_bond_info {
 /* Number of connection tracking objects per pool: must be a power of 2. */
 #define MLX5_ASO_CT_ACTIONS_PER_POOL 64
 
+/* Generate incremental and unique CT index from pool and offset. */
+#define MLX5_MAKE_CT_IDX(pool, offset) \
+	((pool) * MLX5_ASO_CT_ACTIONS_PER_POOL + (offset) + 1)
+
 /* ASO Conntrack state. */
 enum mlx5_aso_ct_state {
 	ASO_CONNTRACK_FREE, /* Inactive, in the free list. */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index eb5b53ac6a..8f2bc7d2f6 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -45,7 +45,7 @@ enum mlx5_rte_flow_action_type {
 enum {
 	MLX5_INDIRECT_ACTION_TYPE_RSS,
 	MLX5_INDIRECT_ACTION_TYPE_AGE,
-	MLX5_INDIRECT_ACTION_TYPE_AGE,
+	MLX5_INDIRECT_ACTION_TYPE_CT,
 };
 
 /* Matches on selected register. */
@@ -1288,6 +1288,33 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
 	return &pool->mtrs[idx % MLX5_ASO_MTRS_PER_POOL];
 }
 
+/*
+ * Get ASO CT action by index.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index to the ASO CT action.
+ *
+ * @return
+ *   The specified ASO CT action pointer.
+ */
+static inline struct mlx5_aso_ct_action *
+flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_pool *pool;
+
+	idx--;
+	MLX5_ASSERT((idx / MLX5_ASO_CT_ACTIONS_PER_POOL) < mng->n);
+	/* Bit operation AND could be used. */
+	rte_rwlock_read_lock(&mng->resize_rwl);
+	pool = mng->pools[idx / MLX5_ASO_CT_ACTIONS_PER_POOL];
+	rte_rwlock_read_unlock(&mng->resize_rwl);
+	return &pool->actions[idx % MLX5_ASO_CT_ACTIONS_PER_POOL];
+}
+
 int mlx5_flow_group_to_table(struct rte_eth_dev *dev,
 			     const struct mlx5_flow_tunnel *tunnel,
 			     uint32_t group, uint32_t *table,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 0d022dff3f..c8ff693e4c 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11120,6 +11120,262 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
 	return age_idx;
 }
 
+/*
+ * Release an ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index of ASO CT action to release.
+ *
+ * @return
+ *   0 when CT action was removed, otherwise the number of references.
+ */
+static inline int
+flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
+	uint32_t ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
+
+	if (!ret) {
+		if (ct->dr_action_orig) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			claim_zero(mlx5_glue->destroy_flow_action
+					(ct->dr_action_orig));
+#endif
+			ct->dr_action_orig = NULL;
+		}
+		if (ct->dr_action_rply) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			claim_zero(mlx5_glue->destroy_flow_action
+					(ct->dr_action_rply));
+#endif
+			ct->dr_action_rply = NULL;
+		}
+		rte_spinlock_lock(&mng->ct_sl);
+		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
+		rte_spinlock_unlock(&mng->ct_sl);
+	}
+	return ret;
+}
+
+/*
+ * Resize the ASO CT pools array by 64 pools.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value and rte_errno is set.
+ */
+static int
+flow_dv_aso_ct_pools_resize(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	void *old_pools = mng->pools;
+	/* Magic number now, need a macro. */
+	uint32_t resize = mng->n + 64;
+	uint32_t mem_size = sizeof(struct mlx5_aso_ct_pool *) * resize;
+	void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);
+
+	if (!pools) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	rte_rwlock_write_lock(&mng->resize_rwl);
+	/* ASO SQ/QP was already initialized in the startup. */
+	if (old_pools) {
+		/* Realloc could be an alternative choice. */
+		rte_memcpy(pools, old_pools,
+			   mng->n * sizeof(struct mlx5_aso_ct_pool *));
+		mlx5_free(old_pools);
+	}
+	mng->n = resize;
+	mng->pools = pools;
+	rte_rwlock_write_unlock(&mng->resize_rwl);
+	return 0;
+}
+
+/*
+ * Create and initialize a new ASO CT pool.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] ct_free
+ *   Where to put the pointer of a new CT action.
+ *
+ * @return
+ *   The CT actions pool pointer and @p ct_free is set on success,
+ *   NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_aso_ct_pool *
+flow_dv_ct_pool_create(struct rte_eth_dev *dev,
+		       struct mlx5_aso_ct_action **ct_free)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_pool *pool = NULL;
+	struct mlx5_devx_obj *obj = NULL;
+	uint32_t i;
+	uint32_t log_obj_size = rte_log2_u32(MLX5_ASO_CT_ACTIONS_PER_POOL);
+
+	obj = mlx5_devx_cmd_create_conn_track_offload_obj(priv->sh->ctx,
+						priv->sh->pdn, log_obj_size);
+	if (!obj) {
+		rte_errno = ENODATA;
+		DRV_LOG(ERR, "Failed to create conn_track_offload_obj using DevX.");
+		return NULL;
+	}
+	pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool), 0, SOCKET_ID_ANY);
+	if (!pool) {
+		rte_errno = ENOMEM;
+		claim_zero(mlx5_devx_cmd_destroy(obj));
+		return NULL;
+	}
+	pool->devx_obj = obj;
+	pool->index = mng->next;
+	/* Resize pools array if there is no room for the new pool in it. */
+	if (pool->index == mng->n && flow_dv_aso_ct_pools_resize(dev)) {
+		claim_zero(mlx5_devx_cmd_destroy(obj));
+		mlx5_free(pool);
+		return NULL;
+	}
+	mng->pools[pool->index] = pool;
+	mng->next++;
+	/* Assign the first action in the new pool, the rest go to free list. */
+	*ct_free = &pool->actions[0];
+	/* Lock outside, the list operation is safe here. */
+	for (i = 1; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
+		/* refcnt is 0 when allocating the memory. */
+		pool->actions[i].offset = i;
+		LIST_INSERT_HEAD(&mng->free_cts, &pool->actions[i], next);
+	}
+	return pool;
+}
+
+/*
+ * Allocate a ASO CT action from free list.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Index to ASO CT action on success, 0 otherwise and rte_errno is set.
+ */
+static uint32_t
+flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_action *ct = NULL;
+	struct mlx5_aso_ct_pool *pool;
+	uint8_t reg_c;
+	uint32_t ct_idx;
+
+	MLX5_ASSERT(mng);
+	if (!priv->config.devx) {
+		rte_errno = ENOTSUP;
+		return 0;
+	}
+	/* Get a free CT action, if no, a new pool will be created. */
+	rte_spinlock_lock(&mng->ct_sl);
+	ct = LIST_FIRST(&mng->free_cts);
+	if (ct) {
+		LIST_REMOVE(ct, next);
+	} else if (!flow_dv_ct_pool_create(dev, &ct)) {
+		rte_spinlock_unlock(&mng->ct_sl);
+		rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_ACTION,
+				   NULL, "failed to create ASO CT pool");
+		return 0;
+	}
+	rte_spinlock_unlock(&mng->ct_sl);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	ct_idx = MLX5_MAKE_CT_IDX(pool->index, ct->offset);
+	/* 0: inactive, 1: created, 2+: used by flows. */
+	__atomic_store_n(&ct->refcnt, 1, __ATOMIC_RELAXED);
+	reg_c = mlx5_flow_get_reg_id(dev, MLX5_ASO_CONNTRACK, 0, error);
+	if (!ct->dr_action_orig) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+		ct->dr_action_orig = mlx5_glue->dv_create_flow_action_aso
+			(priv->sh->rx_domain, pool->devx_obj->obj,
+			 ct->offset,
+			 MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_INITIATOR,
+			 reg_c - REG_C_0);
+#else
+		RTE_SET_USED(reg_c);
+#endif
+		if (!ct->dr_action_orig) {
+			flow_dv_aso_ct_release(dev, ct_idx);
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					   "failed to create ASO CT action");
+			return 0;
+		}
+	}
+	if (!ct->dr_action_rply) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+		ct->dr_action_rply = mlx5_glue->dv_create_flow_action_aso
+			(priv->sh->rx_domain, pool->devx_obj->obj,
+			 ct->offset,
+			 MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_RESPONDER,
+			 reg_c - REG_C_0);
+#endif
+		if (!ct->dr_action_rply) {
+			flow_dv_aso_ct_release(dev, ct_idx);
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					   "failed to create ASO CT action");
+			return 0;
+		}
+	}
+	return ct_idx;
+}
+
+/*
+ * Create a conntrack object with context and actions by using ASO mechanism.
+ *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
+ * @param[in] pro
+ *   Pointer to conntrack information profile.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Index to conntrack object on success, 0 otherwise.
+ */
+static uint32_t
+flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
+				   const struct rte_flow_action_conntrack *pro,
+				   struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
+	struct mlx5_aso_ct_action *ct;
+	uint32_t idx;
+
+	if (!sh->ct_aso_en)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Connection is not supported");
+	idx = flow_dv_aso_ct_alloc(dev, error);
+	if (!idx)
+		return rte_flow_error_set(error, rte_errno,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Failed to allocate CT object");
+	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (mlx5_aso_ct_update_by_wqe(sh, ct, pro))
+		return rte_flow_error_set(error, EBUSY,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Failed to update CT");
+	return idx;
+}
+
 /**
  * Fill the flow with DV spec, lock free
  * (mutex should be acquired by caller).
@@ -13317,6 +13573,7 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 {
 	uint32_t idx = 0;
 	uint32_t ret = 0;
+	struct mlx5_priv *priv = dev->data->dev_private;
 
 	switch (action->type) {
 	case RTE_FLOW_ACTION_TYPE_RSS:
@@ -13337,6 +13594,12 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 							 (void *)(uintptr_t)idx;
 		}
 		break;
+	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+		ret = flow_dv_translate_create_conntrack(dev, action->conf,
+							 err);
+		idx = (MLX5_INDIRECT_ACTION_TYPE_CT <<
+		       MLX5_INDIRECT_ACTION_TYPE_OFFSET) | ret;
+		break;
 	default:
 		rte_flow_error_set(err, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
 				   NULL, "action type not supported");
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 08/17] net/mlx5: close CT management structure
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (6 preceding siblings ...)
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 07/17] net/mlx5: add actions creating " Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 09/17] net/mlx5: add ASO CT query implementation Bing Zhao
                     ` (8 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When freeing the IB shared context during stopping a device, the
ASO connection tracking management structure should also be cleaned
up.

All the DR actions created should be destroyed. The structures need
to be freed and ASO CT QP should be released. In the meanwhile, the
allocated and registered memory region for query should also be
deregistered and then freed.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.c          | 56 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_aso.c |  4 +++
 2 files changed, 60 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 86dbe6d573..d563da109a 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -706,6 +706,60 @@ mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
 	return 0;
 }
 
+/*
+ * Close and release all the resources of the
+ * ASO connection tracking management structure.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object to free.
+ */
+static void
+mlx5_flow_aso_ct_mng_close(struct mlx5_dev_ctx_shared *sh)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	struct mlx5_aso_ct_pool *ct_pool;
+	struct mlx5_aso_ct_action *ct;
+	uint32_t idx;
+	uint32_t val;
+	uint32_t cnt;
+	int i;
+
+	mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
+	idx = mng->next;
+	while (idx--) {
+		cnt = 0;
+		ct_pool = mng->pools[idx];
+		for (i = 0; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
+			ct = &ct_pool->actions[i];
+			val = __atomic_fetch_sub(&ct->refcnt, 1,
+						 __ATOMIC_RELAXED);
+			MLX5_ASSERT(val == 1);
+			if (val > 1)
+				cnt++;
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			if (ct->dr_action_orig)
+				claim_zero(mlx5_glue->destroy_flow_action
+							(ct->dr_action_orig));
+			if (ct->dr_action_rply)
+				claim_zero(mlx5_glue->destroy_flow_action
+							(ct->dr_action_rply));
+#endif
+		}
+		claim_zero(mlx5_devx_cmd_destroy(ct_pool->devx_obj));
+		if (cnt) {
+			DRV_LOG(DEBUG, "%u ASO CT objects are being used in the pool %u",
+				cnt, i);
+		}
+		mlx5_free(ct_pool);
+		/* in case of failure. */
+		mng->next--;
+	}
+	mlx5_free(mng->pools);
+	mlx5_free(mng);
+	/* Management structure must be cleared to 0s during allocation. */
+	sh->ct_mng = NULL;
+}
+
 /**
  * Initialize the flow resources' indexed mempool.
  *
@@ -1508,6 +1562,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->mreg_cp_tbl)
 		mlx5_hlist_destroy(priv->mreg_cp_tbl);
 	mlx5_mprq_free_mp(dev);
+	if (priv->sh->ct_mng)
+		mlx5_flow_aso_ct_mng_close(priv->sh);
 	mlx5_os_free_shared_dr(priv);
 	if (priv->rss_conf.rss_key != NULL)
 		mlx5_free(priv->rss_conf.rss_key);
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 0ff19e6171..3c2350a6b8 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -372,6 +372,10 @@ mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
 	case ASO_OPC_MOD_POLICER:
 		sq = &sh->mtrmng->pools_mng.sq;
 		break;
+	case ASO_OPC_MOD_CONNECTION_TRACKING:
+		mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
+		sq = &sh->ct_mng->aso_sq;
+		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
 		return;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 09/17] net/mlx5: add ASO CT query implementation
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (7 preceding siblings ...)
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 08/17] net/mlx5: close CT management structure Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 10/17] net/mlx5: add ASO CT destroy handling Bing Zhao
                     ` (7 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After the connection tracking context is created and being used by
the flows, the context will be updated by the HW automatically after
a packet passed the CT validation. E.g., the ACK, SEQ, window and
state of CT can be updated with both direction traffic.

In order to query the updated contents of this context, a WQE should
be posted to the SQ with a return buffer. The data will be filled
into the buffer. And the profile will be filled with specific value.

During the execution of query command, the context may be updated.
The result of the query command may not be the latest one.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |  10 +-
 drivers/net/mlx5/mlx5_flow_aso.c | 245 +++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c  |  19 +++
 3 files changed, 273 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index de18a59c8e..d2827e78d7 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -490,7 +490,10 @@ struct mlx5_aso_sq_elem {
 			uint16_t burst_size;
 		};
 		struct mlx5_aso_mtr *mtr;
-		struct mlx5_aso_ct_action *ct;
+		struct {
+			struct mlx5_aso_ct_action *ct;
+			char *query_data;
+		};
 	};
 };
 
@@ -1702,5 +1705,10 @@ int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			      struct mlx5_aso_ct_action *ct,
 			      const struct rte_flow_action_conntrack *profile);
+int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+			   struct mlx5_aso_ct_action *ct);
+int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			     struct mlx5_aso_ct_action *ct,
+			     struct rte_flow_action_conntrack *profile);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 3c2350a6b8..3f7ed371bf 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -933,6 +933,7 @@ mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
 	/* Fill next WQE. */
 	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
 	sq->elts[sq->head & mask].ct = ct;
+	sq->elts[sq->head & mask].query_data = NULL;
 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
 	/* Each WQE will have a single CT object. */
 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
@@ -1048,9 +1049,95 @@ mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
 		ct = sq->elts[idx].ct;
 		MLX5_ASSERT(ct);
 		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
+		if (sq->elts[idx].query_data)
+			rte_memcpy(sq->elts[idx].query_data,
+				   (char *)((uintptr_t)sq->mr.addr + idx * 64),
+				   64);
 	}
 }
 
+/*
+ * Post a WQE to the ASO CT SQ to query the current context.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ * @param[in] ct
+ *   Pointer to the generic CT structure related to the context.
+ * @param[in] data
+ *   Pointer to data area to be filled.
+ *
+ * @return
+ *   1 on success (WQE number), 0 on failure.
+ */
+static int
+mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
+			    struct mlx5_aso_ct_action *ct, char *data)
+{
+	volatile struct mlx5_aso_wqe *wqe = NULL;
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t res;
+	uint16_t wqe_idx;
+	struct mlx5_aso_ct_pool *pool;
+	enum mlx5_aso_ct_state state =
+				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+
+	if (state == ASO_CONNTRACK_FREE) {
+		DRV_LOG(ERR, "Fail: No context to query");
+		return -1;
+	} else if (state == ASO_CONNTRACK_WAIT) {
+		return 0;
+	}
+	rte_spinlock_lock(&sq->sqsl);
+	res = size - (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!res)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
+		return 0;
+	}
+	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY);
+	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
+	/* Confirm the location and address of the prefetch instruction. */
+	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
+	/* Fill next WQE. */
+	wqe_idx = sq->head & mask;
+	sq->elts[wqe_idx].ct = ct;
+	sq->elts[wqe_idx].query_data = data;
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	/* Each WQE will have a single CT object. */
+	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
+						  ct->offset);
+	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
+			(ASO_OPC_MOD_CONNECTION_TRACKING <<
+			 WQE_CSEG_OPC_MOD_OFFSET) |
+			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
+	/*
+	 * There is no write request is required.
+	 * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
+	 * "BYTEWISE_64BYTE" is needed for a whole context.
+	 * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
+	 * "data_mask" is ignored.
+	 * Buffer address was already filled during initialization.
+	 */
+	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE <<
+					ASO_CSEG_DATA_MASK_MODE_OFFSET);
+	wqe->aso_cseg.data_mask = 0;
+	sq->head++;
+	/*
+	 * Each WQE contains 2 WQEBB's, even though
+	 * data segment is not used in this case.
+	 */
+	sq->pi += 2;
+	rte_io_wmb();
+	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
+	rte_wmb();
+	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
+	rte_wmb();
+	rte_spinlock_unlock(&sq->sqsl);
+	return 1;
+}
+
 /*
  * Handle completions from WQEs sent to ASO CT.
  *
@@ -1143,3 +1230,161 @@ mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		ct->offset, pool->index);
 	return -1;
 }
+
+/*
+ * The routine is used to wait for WQE completion to continue with queried data.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+		       struct mlx5_aso_ct_action *ct)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+
+	if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+	    ASO_CONNTRACK_READY)
+		return 0;
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+		    ASO_CONNTRACK_READY)
+			return 0;
+		/* Waiting for CQE ready, consider should block or sleep. */
+		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
+	} while (--poll_cqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+}
+
+/*
+ * Convert the hardware conntrack data format into the profile.
+ *
+ * @param[in] profile
+ *   Pointer to conntrack profile to be filled after query.
+ * @param[in] wdata
+ *   Pointer to data fetched from hardware.
+ */
+static inline void
+mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
+			char *wdata)
+{
+	void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
+	void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
+
+	/* MLX5_GET16 should be taken into consideration. */
+	profile->state = (enum rte_flow_conntrack_state)
+			 MLX5_GET(conn_track_aso, wdata, state);
+	profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
+	profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
+					  sack_permitted);
+	profile->live_connection = MLX5_GET(conn_track_aso, wdata,
+					    connection_assured);
+	profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
+						 challenged_acked);
+	profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
+					   max_ack_window);
+	profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
+						 retranmission_limit);
+	profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
+	profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
+	profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
+			      MLX5_GET(conn_track_aso, wdata, last_index);
+	profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
+	profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
+	profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
+	profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
+				reply_direction_tcp_liberal_enabled) |
+				MLX5_GET(conn_track_aso, wdata,
+				original_direction_tcp_liberal_enabled);
+	/* No liberal in the RTE structure profile. */
+	profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					    reply_direction_tcp_scale);
+	profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_close_initiated);
+	profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_data_unacked);
+	profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_max_ack);
+	profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
+					       r_dir, sent_end);
+	profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
+						r_dir, reply_end);
+	profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
+					      r_dir, max_win);
+	profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
+					      r_dir, max_ack);
+	profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					       original_direction_tcp_scale);
+	profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_close_initiated);
+	profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_data_unacked);
+	profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_max_ack);
+	profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
+						  o_dir, sent_end);
+	profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
+						   o_dir, reply_end);
+	profile->original_dir.max_win = MLX5_GET(tcp_window_params,
+						 o_dir, max_win);
+	profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
+						 o_dir, max_ack);
+}
+
+/*
+ * Query connection tracking information parameter by send WQE.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ * @param[out] profile
+ *   Pointer to connection tracking TCP information.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			 struct mlx5_aso_ct_action *ct,
+			 struct rte_flow_action_conntrack *profile)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+	char out_data[64 * 2];
+	int ret;
+
+	MLX5_ASSERT(ct);
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		ret = mlx5_aso_ct_sq_query_single(mng, ct, out_data);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			goto data_handle;
+		/* Waiting for wqe resource or state. */
+		else
+			rte_delay_us_sleep(10u);
+	} while (--poll_wqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+data_handle:
+	ret = mlx5_aso_ct_wait_ready(sh, ct);
+	if (!ret)
+		mlx5_aso_ct_obj_analyze(profile, out_data);
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index c8ff693e4c..84e7f0b3d3 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -13775,6 +13775,8 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 	uint32_t act_idx = (uint32_t)(uintptr_t)handle;
 	uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
 
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
@@ -13788,6 +13790,23 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 			resp->sec_since_last_hit = __atomic_load_n
 			     (&age_param->sec_since_last_hit, __ATOMIC_RELAXED);
 		return 0;
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		ct = flow_aso_ct_get_by_idx(dev, idx);
+		if (!ct->refcnt)
+			return rte_flow_error_set(error, EFAULT,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"CT object is inactive");
+		((struct rte_flow_action_conntrack *)data)->peer_port =
+							ct->peer;
+		((struct rte_flow_action_conntrack *)data)->is_original_dir =
+							ct->is_original;
+		if (mlx5_aso_ct_query_by_wqe(priv->sh, ct, data))
+			return rte_flow_error_set(error, EIO,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"Failed to query CT context");
+		return 0;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 10/17] net/mlx5: add ASO CT destroy handling
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (8 preceding siblings ...)
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 09/17] net/mlx5: add ASO CT query implementation Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 11/17] net/mlx5: add translation of CT action Bing Zhao
                     ` (6 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When trying to destroy an ASO connection tracking context, the DR
action created on this context should also be destroyed. Before
inserting the related software object into the management free list,
the reference count should be checked.

Right now, the context object will not be freed to the system and
will be reused directly from the free list.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_dv.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 84e7f0b3d3..0fa0671ace 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11136,9 +11136,15 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	uint32_t ret;
 	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
-	uint32_t ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
+	enum mlx5_aso_ct_state state =
+			__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
 
+	/* Cannot release when CT is in the ASO SQ. */
+	if (state == ASO_CONNTRACK_WAIT || state == ASO_CONNTRACK_QUERY)
+		return -1;
+	ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
 	if (!ret) {
 		if (ct->dr_action_orig) {
 #ifdef HAVE_MLX5_DR_ACTION_ASO_CT
@@ -11154,6 +11160,8 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 #endif
 			ct->dr_action_rply = NULL;
 		}
+		/* Clear the state to free, no need in 1st allocation. */
+		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_FREE);
 		rte_spinlock_lock(&mng->ct_sl);
 		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
 		rte_spinlock_unlock(&mng->ct_sl);
@@ -13648,6 +13656,12 @@ flow_dv_action_destroy(struct rte_eth_dev *dev,
 			DRV_LOG(DEBUG, "Indirect age action %" PRIu32 " was"
 				" released with references %d.", idx, ret);
 		return 0;
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		ret = flow_dv_aso_ct_release(dev, idx);
+		if (ret)
+			DRV_LOG(DEBUG, "Connection tracking object %u still "
+				"has references %d.", idx, ret);
+		return 0;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 11/17] net/mlx5: add translation of CT action
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (9 preceding siblings ...)
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 10/17] net/mlx5: add ASO CT destroy handling Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 12/17] net/mlx5: add translation of CT item Bing Zhao
                     ` (5 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When creating a flow with this action context for CT, it needs to be
translated in 2 levels.

First, retrieve from action context to rte_flow action.
Second, translate it to the corresponding DR action with traffic
direction that was specified when creating or updating via
rte_flow_action_handle* API.

Before using the DR action in a flow, the CT context should be
available to use in the hardware. A synchronization is done before
inserting the flow rule with CT action to check the HW availability
of this CT context.

In order to release the DR actions and reuse the context of a CT,
the reference count should also be handled in the flow rule
destroying.

The CT index will be recorded in the rte_flow by reusing the ASO age
index to save memory, since only one ASO action is supported in one
flow rule currently. The action context type should also be saved
for CT. When destroying a flow rule, if the context type is CT and
the index is valid (non-zero), the release process should be
handled. By default, the handling will fall back to try to release
the ASO age if any.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |  2 ++
 drivers/net/mlx5/mlx5_flow.c     |  9 +++++++
 drivers/net/mlx5/mlx5_flow.h     |  7 +++++-
 drivers/net/mlx5/mlx5_flow_aso.c | 41 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c  | 28 +++++++++++++++++++++-
 5 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d2827e78d7..d01a10ea54 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1710,5 +1710,7 @@ int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
 int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			     struct mlx5_aso_ct_action *ct,
 			     struct rte_flow_action_conntrack *profile);
+int mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
+			  struct mlx5_aso_ct_action *ct);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index edad6007a8..f36eeae03f 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -3553,6 +3553,15 @@ flow_action_handles_translate(struct rte_eth_dev *dev,
 				break;
 			}
 			/* Fall-through */
+		case MLX5_INDIRECT_ACTION_TYPE_CT:
+			if (priv->sh->ct_aso_en) {
+				translated[handle->index].type =
+					RTE_FLOW_ACTION_TYPE_CONNTRACK;
+				translated[handle->index].conf =
+							 (void *)(uintptr_t)idx;
+				break;
+			}
+			/* Fall-through */
 		default:
 			mlx5_free(translated);
 			return rte_flow_error_set
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 8f2bc7d2f6..286e3fb6a4 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -226,6 +226,7 @@ enum mlx5_feature_name {
 #define MLX5_FLOW_ACTION_TUNNEL_MATCH (1ull << 38)
 #define MLX5_FLOW_ACTION_MODIFY_FIELD (1ull << 39)
 #define MLX5_FLOW_ACTION_METER_WITH_TERMINATED_POLICY (1ull << 40)
+#define MLX5_FLOW_ACTION_CT (1ull << 41)
 
 #define MLX5_FLOW_FATE_ACTIONS \
 	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | \
@@ -969,11 +970,15 @@ struct rte_flow {
 	uint32_t drv_type:2; /**< Driver type. */
 	uint32_t tunnel:1;
 	uint32_t meter:24; /**< Holds flow meter id. */
+	uint32_t indirect_type:2; /**< Indirect action type. */
 	uint32_t rix_mreg_copy;
 	/**< Index to metadata register copy table resource. */
 	uint32_t counter; /**< Holds flow counter. */
 	uint32_t tunnel_id;  /**< Tunnel id */
-	uint32_t age; /**< Holds ASO age bit index. */
+	union {
+		uint32_t age; /**< Holds ASO age bit index. */
+		uint32_t ct; /**< Holds ASO CT index. */
+	};
 	uint32_t geneve_tlv_option; /**< Holds Geneve TLV option id. > */
 } __rte_packed;
 
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 3f7ed371bf..d0a989e213 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -1388,3 +1388,44 @@ mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		mlx5_aso_ct_obj_analyze(profile, out_data);
 	return ret;
 }
+
+/*
+ * Make sure the conntrack context is synchronized with hardware before
+ * creating a flow rule that uses it.
+ *
+ * @param[in] sh
+ *   Pointer to shared device context.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
+		      struct mlx5_aso_ct_action *ct)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	enum mlx5_aso_ct_state state =
+				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+
+	if (state == ASO_CONNTRACK_FREE) {
+		rte_errno = ENXIO;
+		return -rte_errno;
+	} else if (state == ASO_CONNTRACK_READY ||
+		   state == ASO_CONNTRACK_QUERY) {
+		return 0;
+	}
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+		if (state == ASO_CONNTRACK_READY ||
+		    state == ASO_CONNTRACK_QUERY)
+			return 0;
+		/* Waiting for CQE ready, consider should block or sleep. */
+		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
+	} while (--poll_cqe_times);
+	rte_errno = EBUSY;
+	return -rte_errno;
+}
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 0fa0671ace..14af900267 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11381,6 +11381,7 @@ flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
 		return rte_flow_error_set(error, EBUSY,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to update CT");
+	ct->is_original = !!pro->is_original_dir;
 	return idx;
 }
 
@@ -11544,6 +11545,8 @@ flow_dv_translate(struct rte_eth_dev *dev,
 		const struct rte_flow_action *found_action = NULL;
 		uint32_t jump_group = 0;
 		struct mlx5_flow_counter *cnt;
+		uint32_t ct_idx;
+		struct mlx5_aso_ct_action *ct;
 
 		if (!mlx5_flow_os_action_supported(action_type))
 			return rte_flow_error_set(error, ENOTSUP,
@@ -12017,6 +12020,26 @@ flow_dv_translate(struct rte_eth_dev *dev,
 				return -rte_errno;
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			break;
+		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+			ct_idx = (uint32_t)(uintptr_t)action->conf;
+			ct = flow_aso_ct_get_by_idx(dev, ct_idx);
+			if (mlx5_aso_ct_available(priv->sh, ct))
+				return rte_flow_error_set(error, rte_errno,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						NULL,
+						"CT is unavailable.");
+			if (ct->is_original)
+				dev_flow->dv.actions[actions_n] =
+							ct->dr_action_orig;
+			else
+				dev_flow->dv.actions[actions_n] =
+							ct->dr_action_rply;
+			flow->indirect_type = MLX5_INDIRECT_ACTION_TYPE_CT;
+			flow->ct = ct_idx;
+			__atomic_fetch_add(&ct->refcnt, 1, __ATOMIC_RELAXED);
+			actions_n++;
+			action_flags |= MLX5_FLOW_ACTION_CT;
+			break;
 		case RTE_FLOW_ACTION_TYPE_END:
 			actions_end = true;
 			if (mhdr_res->actions_num) {
@@ -13152,7 +13175,10 @@ flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
 			mlx5_flow_meter_detach(priv, fm);
 		flow->meter = 0;
 	}
-	if (flow->age)
+	/* Keep the current age handling by default. */
+	if (flow->indirect_type == MLX5_INDIRECT_ACTION_TYPE_CT && flow->ct)
+		flow_dv_aso_ct_release(dev, flow->ct);
+	else if (flow->age)
 		flow_dv_aso_age_release(dev, flow->age);
 	if (flow->geneve_tlv_option) {
 		flow_dv_geneve_tlv_option_resource_release(dev);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 12/17] net/mlx5: add translation of CT item
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (10 preceding siblings ...)
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 11/17] net/mlx5: add translation of CT action Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 13/17] net/mlx5: add CT context update Bing Zhao
                     ` (4 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The return register of the DR action will be used for matching.
After the ASO CT checking of a TCP packet, the syndrome is filled in
the register. Only the 8 LSB should be used. A converting from
RTE_FLOW_CONNTRACK_FLAG* to the syndrome should be done after
checing the spec and mask fields.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    |  7 ++++
 drivers/net/mlx5/mlx5_flow_dv.c | 62 +++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 286e3fb6a4..eb0bb42161 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -405,6 +405,13 @@ enum mlx5_feature_name {
 /* Maximum number of fields to modify in MODIFY_FIELD */
 #define MLX5_ACT_MAX_MOD_FIELDS 5
 
+/* Syndrome bits definition for connection tracking. */
+#define MLX5_CT_SYNDROME_VALID		(0x0 << 6)
+#define MLX5_CT_SYNDROME_INVALID	(0x1 << 6)
+#define MLX5_CT_SYNDROME_TRAP		(0x2 << 6)
+#define MLX5_CT_SYNDROME_STATE_CHANGE	(0x1 << 1)
+#define MLX5_CT_SYNDROME_BAD_PACKET	(0x1 << 0)
+
 enum mlx5_flow_drv_type {
 	MLX5_FLOW_TYPE_MIN,
 	MLX5_FLOW_TYPE_DV,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 14af900267..b0858e3df8 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -9379,6 +9379,64 @@ flow_dv_translate_item_ecpri(struct rte_eth_dev *dev, void *matcher,
 	}
 }
 
+/*
+ * Add connection tracking status item to matcher
+ *
+ * @param[in] dev
+ *   The devich to configure through.
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ */
+static void
+flow_dv_translate_item_aso_ct(struct rte_eth_dev *dev,
+			      void *matcher, void *key,
+			      const struct rte_flow_item *item)
+{
+	uint32_t reg_value = 0;
+	int reg_id;
+	/* 8LSB 0b 11/0000/11, middle 4 bits are reserved. */
+	uint32_t reg_mask = 0;
+	const struct rte_flow_item_conntrack *spec = item->spec;
+	const struct rte_flow_item_conntrack *mask = item->mask;
+	uint32_t flags;
+	struct rte_flow_error error;
+
+	if (!mask)
+		mask = &rte_flow_item_conntrack_mask;
+	if (!spec || !mask->flags)
+		return;
+	flags = spec->flags & mask->flags;
+	/* The conflict should be checked in the validation. */
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_VALID)
+		reg_value |= MLX5_CT_SYNDROME_VALID;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_CHANGED)
+		reg_value |= MLX5_CT_SYNDROME_STATE_CHANGE;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_INVALID)
+		reg_value |= MLX5_CT_SYNDROME_INVALID;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED)
+		reg_value |= MLX5_CT_SYNDROME_TRAP;
+	if (flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD)
+		reg_value |= MLX5_CT_SYNDROME_BAD_PACKET;
+	if (mask->flags & (RTE_FLOW_CONNTRACK_PKT_STATE_VALID |
+			   RTE_FLOW_CONNTRACK_PKT_STATE_INVALID |
+			   RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED))
+		reg_mask |= 0xc0;
+	if (mask->flags & RTE_FLOW_CONNTRACK_PKT_STATE_CHANGED)
+		reg_mask |= MLX5_CT_SYNDROME_STATE_CHANGE;
+	if (mask->flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD)
+		reg_mask |= MLX5_CT_SYNDROME_BAD_PACKET;
+	/* The REG_C_x value could be saved during startup. */
+	reg_id = mlx5_flow_get_reg_id(dev, MLX5_ASO_CONNTRACK, 0, &error);
+	if (reg_id == REG_NON)
+		return;
+	flow_dv_match_meta_reg(matcher, key, (enum modify_reg)reg_id,
+			       reg_value, reg_mask);
+}
+
 static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
 
 #define HEADER_IS_ZERO(match_criteria, headers)				     \
@@ -12322,6 +12380,10 @@ flow_dv_translate(struct rte_eth_dev *dev,
 			/* No other protocol should follow eCPRI layer. */
 			last_item = MLX5_FLOW_LAYER_ECPRI;
 			break;
+		case RTE_FLOW_ITEM_TYPE_CONNTRACK:
+			flow_dv_translate_item_aso_ct(dev, match_mask,
+						      match_value, items);
+			break;
 		default:
 			break;
 		}
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 13/17] net/mlx5: add CT context update
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (11 preceding siblings ...)
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 12/17] net/mlx5: add translation of CT item Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 14/17] net/mlx5: validation of CT action Bing Zhao
                     ` (3 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When updating a connection tracking context, two separate parts
could be updated.
First, the direction. This will only update the traffic direction
recorded in the software for flow creation.
Second, the TCP parameters. The hardware context will be updated
via the WQE. This update will be blocked until the hardware status
is updated and ready for the next flow creation.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_dv.c | 56 +++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index b0858e3df8..6afbbbc4bb 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -13824,6 +13824,60 @@ __flow_dv_action_rss_update(struct rte_eth_dev *dev, uint32_t idx,
 	return ret;
 }
 
+/*
+ * Updates in place conntrack context or direction.
+ * Context update should be synchronized.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   The conntrack object ID to be updated.
+ * @param[in] update
+ *   Pointer to the structure of information to update.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value.
+ */
+static int
+__flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
+			   const struct rte_flow_modify_conntrack *update,
+			   struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
+	const struct rte_flow_action_conntrack *new_prf;
+	int ret = 0;
+
+	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (!ct->refcnt)
+		return rte_flow_error_set(error, ENOMEM,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "CT object is inactive");
+	new_prf = &update->new_ct;
+	if (update->direction)
+		ct->is_original = !!new_prf->is_original_dir;
+	if (update->state) {
+		ret = mlx5_aso_ct_update_by_wqe(priv->sh, ct, new_prf);
+		if (ret)
+			return rte_flow_error_set(error, EIO,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"Failed to send CT context update WQE");
+		/* Block until ready or a failure. */
+		ret = mlx5_aso_ct_available(priv->sh, ct);
+		if (ret)
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					   NULL,
+					   "Timeout to get the CT update");
+	}
+	return ret;
+}
+
 /**
  * Updates in place shared action configuration, lock free,
  * (mutex should be acquired by caller).
@@ -13859,6 +13913,8 @@ flow_dv_action_update(struct rte_eth_dev *dev,
 	case MLX5_INDIRECT_ACTION_TYPE_RSS:
 		action_conf = ((const struct rte_flow_action *)update)->conf;
 		return __flow_dv_action_rss_update(dev, idx, action_conf, err);
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		return __flow_dv_action_ct_update(dev, idx, update, err);
 	default:
 		return rte_flow_error_set(err, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 14/17] net/mlx5: validation of CT action
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (12 preceding siblings ...)
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 13/17] net/mlx5: add CT context update Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 15/17] net/mlx5: validation of CT item Bing Zhao
                     ` (2 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The validation of a CT action contains two parts. The first is the
CT action configurations parameter. When creating a CT action
context, some members need to be verified.

The second is that when creating a flow, the DR action of CT should
be validated with other actions and items as well. Currently, only
the TCP protocol support connection tracking.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h         |  4 ++
 drivers/net/mlx5/mlx5_flow.c    | 31 +++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c | 69 +++++++++++++++++++++++++++++++++
 3 files changed, 104 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d01a10ea54..36b7f05822 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1611,6 +1611,10 @@ int mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow,
 void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
 int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
 			uint32_t nb_contexts, struct rte_flow_error *error);
+int mlx5_validate_action_ct(struct rte_eth_dev *dev,
+			    const struct rte_flow_action_conntrack *conntrack,
+			    struct rte_flow_error *error);
+
 
 /* mlx5_mp_os.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index f36eeae03f..6baaefbaba 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1688,6 +1688,37 @@ mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
 	return 0;
 }
 
+/*
+ * Validate the ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] conntrack
+ *   Pointer to the CT action profile.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_validate_action_ct(struct rte_eth_dev *dev,
+			const struct rte_flow_action_conntrack *conntrack,
+			struct rte_flow_error *error)
+{
+	RTE_SET_USED(dev);
+
+	if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Invalid CT state");
+	if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Invalid last TCP packet flag");
+	return 0;
+}
+
 /**
  * Verify the @p attributes will be correctly understood by the NIC and store
  * them in the @p flow if everything is correct.
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 6afbbbc4bb..f2a2c609e2 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -3400,6 +3400,57 @@ flow_dv_validate_action_raw_encap_decap
 	return 0;
 }
 
+/*
+ * Validate the ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] action_flags
+ *   Holds the actions detected until now.
+ * @param[in] item_flags
+ *   The items found in this flow rule.
+ * @param[in] attr
+ *   Pointer to flow attributes.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_action_aso_ct(struct rte_eth_dev *dev,
+			       uint64_t action_flags,
+			       uint64_t item_flags,
+			       const struct rte_flow_attr *attr,
+			       struct rte_flow_error *error)
+{
+	RTE_SET_USED(dev);
+
+	if (attr->group == 0 && !attr->transfer)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "Only support non-root table");
+	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "CT cannot follow a fate action");
+	if ((action_flags & MLX5_FLOW_ACTION_METER) ||
+	    (action_flags & MLX5_FLOW_ACTION_AGE))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Only one ASO action is supported");
+	if (action_flags & MLX5_FLOW_ACTION_ENCAP)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Encap cannot exist before CT");
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					  "Not a outer TCP packet");
+	return 0;
+}
+
 /**
  * Match encap_decap resource.
  *
@@ -7205,6 +7256,14 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			rw_act_num += ret;
 			break;
+		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+			ret = flow_dv_validate_action_aso_ct(dev, action_flags,
+							     item_flags, attr,
+							     error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_FLOW_ACTION_CT;
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ACTION,
@@ -13861,6 +13920,10 @@ __flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
 	if (update->direction)
 		ct->is_original = !!new_prf->is_original_dir;
 	if (update->state) {
+		/* Only validate the profile when it needs to be updated. */
+		ret = mlx5_validate_action_ct(dev, new_prf, error);
+		if (ret)
+			return ret;
 		ret = mlx5_aso_ct_update_by_wqe(priv->sh, ct, new_prf);
 		if (ret)
 			return rte_flow_error_set(error, EIO,
@@ -15732,6 +15795,12 @@ flow_dv_action_validate(struct rte_eth_dev *dev,
 						NULL,
 					     "shared age action not supported");
 		return flow_dv_validate_action_age(0, action, dev, err);
+	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+		if (!priv->sh->ct_aso_en)
+			return rte_flow_error_set(err, ENOTSUP,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					"ASO CT is not supported");
+		return mlx5_validate_action_ct(dev, action->conf, err);
 	default:
 		return rte_flow_error_set(err, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 15/17] net/mlx5: validation of CT item
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (13 preceding siblings ...)
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 14/17] net/mlx5: validation of CT action Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 16/17] net/mlx5: add support of CT between two ports Bing Zhao
  2021-05-05  8:06   ` [dpdk-dev] [PATCH v5 17/17] doc: update mlx5 support for conntrack Bing Zhao
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The item of ASO connection tracking will be translated into the
register value when matching. The validation of this item has no
dependency on other layers, since the flow including this item
should be jumped from another group. All the layers checking was
already done in the previous groups. Only the state bits conflict
should be checked.

It is assumed that the flow with CT item will always work on the
TCP traffic.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    |  3 ++
 drivers/net/mlx5/mlx5_flow_dv.c | 51 +++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index eb0bb42161..238befa2d4 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -147,6 +147,9 @@ enum mlx5_feature_name {
 #define MLX5_FLOW_LAYER_GENEVE_OPT (UINT64_C(1) << 32)
 #define MLX5_FLOW_LAYER_GTP_PSC (UINT64_C(1) << 33)
 
+/* Conntrack item. */
+#define MLX5_FLOW_LAYER_ASO_CT (UINT64_C(1) << 34)
+
 /* Outer Masks. */
 #define MLX5_FLOW_LAYER_OUTER_L3 \
 	(MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index f2a2c609e2..aa0a5acdca 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -2598,6 +2598,51 @@ flow_dv_validate_item_ipv6_frag_ext(const struct rte_flow_item *item,
 				  "specified range not supported");
 }
 
+/*
+ * Validate ASO CT item.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Pointer to bit-fields that holds the items detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_item_aso_ct(struct rte_eth_dev *dev,
+			     const struct rte_flow_item *item,
+			     uint64_t *item_flags,
+			     struct rte_flow_error *error)
+{
+	const struct rte_flow_item_conntrack *spec = item->spec;
+	const struct rte_flow_item_conntrack *mask = item->mask;
+	RTE_SET_USED(dev);
+	uint32_t flags;
+
+	if (*item_flags & MLX5_FLOW_LAYER_ASO_CT)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+					  "Only one CT is supported");
+	if (!mask)
+		mask = &rte_flow_item_conntrack_mask;
+	flags = spec->flags & mask->flags;
+	if ((flags & RTE_FLOW_CONNTRACK_PKT_STATE_VALID) &&
+	    ((flags & RTE_FLOW_CONNTRACK_PKT_STATE_INVALID) ||
+	     (flags & RTE_FLOW_CONNTRACK_PKT_STATE_BAD) ||
+	     (flags & RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED)))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+					  "Conflict status bits");
+	/* State change also needs to be considered. */
+	*item_flags |= MLX5_FLOW_LAYER_ASO_CT;
+	return 0;
+}
+
 /**
  * Validate the pop VLAN action.
  *
@@ -6696,6 +6741,12 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 				return ret;
 			last_item = MLX5_FLOW_LAYER_ECPRI;
 			break;
+		case RTE_FLOW_ITEM_TYPE_CONNTRACK:
+			ret = flow_dv_validate_item_aso_ct(dev, items,
+							   &item_flags, error);
+			if (ret < 0)
+				return ret;
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ITEM,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 16/17] net/mlx5: add support of CT between two ports
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (14 preceding siblings ...)
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 15/17] net/mlx5: validation of CT item Bing Zhao
@ 2021-05-05  8:05   ` Bing Zhao
  2021-05-05  8:06   ` [dpdk-dev] [PATCH v5 17/17] doc: update mlx5 support for conntrack Bing Zhao
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:05 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After creating a connection tracking context, it can be used between
two ports. For each port, the flow for one direction traffic will
be created.

The context can only be shared between the owner port and the peer
port that was specified when being created. Only the owner port
could update the context or query it in current implementation.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    | 57 +++++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_flow_dv.c | 71 +++++++++++++++++++++++++--------
 2 files changed, 110 insertions(+), 18 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 238befa2d4..ddaba40f72 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -48,6 +48,25 @@ enum {
 	MLX5_INDIRECT_ACTION_TYPE_CT,
 };
 
+/* Now, the maximal ports will be supported is 256, action number is 4M. */
+#define MLX5_INDIRECT_ACT_CT_MAX_PORT 0x100
+
+#define MLX5_INDIRECT_ACT_CT_OWNER_SHIFT 22
+#define MLX5_INDIRECT_ACT_CT_OWNER_MASK (MLX5_INDIRECT_ACT_CT_MAX_PORT - 1)
+
+/* 30-31: type, 22-29: owner port, 0-21: index. */
+#define MLX5_INDIRECT_ACT_CT_GEN_IDX(owner, index) \
+	((MLX5_INDIRECT_ACTION_TYPE_CT << MLX5_INDIRECT_ACTION_TYPE_OFFSET) | \
+	 (((owner) & MLX5_INDIRECT_ACT_CT_OWNER_MASK) << \
+	  MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) | (index))
+
+#define MLX5_INDIRECT_ACT_CT_GET_OWNER(index) \
+	(((index) >> MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) & \
+	 MLX5_INDIRECT_ACT_CT_OWNER_MASK)
+
+#define MLX5_INDIRECT_ACT_CT_GET_IDX(index) \
+	((index) & ((1 << MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) - 1))
+
 /* Matches on selected register. */
 struct mlx5_rte_flow_item_tag {
 	enum modify_reg id;
@@ -1304,7 +1323,7 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
 }
 
 /*
- * Get ASO CT action by index.
+ * Get ASO CT action by device and index.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
@@ -1315,7 +1334,7 @@ mlx5_aso_meter_by_idx(struct mlx5_priv *priv, uint32_t idx)
  *   The specified ASO CT action pointer.
  */
 static inline struct mlx5_aso_ct_action *
-flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
+flow_aso_ct_get_by_dev_idx(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
@@ -1330,6 +1349,40 @@ flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
 	return &pool->actions[idx % MLX5_ASO_CT_ACTIONS_PER_POOL];
 }
 
+/*
+ * Get ASO CT action by owner & index.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index to the ASO CT action and owner port combination.
+ *
+ * @return
+ *   The specified ASO CT action pointer.
+ */
+static inline struct mlx5_aso_ct_action *
+flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t own_idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(own_idx);
+	uint32_t idx = MLX5_INDIRECT_ACT_CT_GET_IDX(own_idx);
+
+	if (owner == PORT_ID(priv)) {
+		ct = flow_aso_ct_get_by_dev_idx(dev, idx);
+	} else {
+		struct rte_eth_dev *owndev = &rte_eth_devices[owner];
+
+		MLX5_ASSERT(owner < RTE_MAX_ETHPORTS);
+		if (dev->data->dev_started != 1)
+			return NULL;
+		ct = flow_aso_ct_get_by_dev_idx(owndev, idx);
+		if (ct->peer != PORT_ID(priv))
+			return NULL;
+	}
+	return ct;
+}
+
 int mlx5_flow_group_to_table(struct rte_eth_dev *dev,
 			     const struct mlx5_flow_tunnel *tunnel,
 			     uint32_t group, uint32_t *table,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index aa0a5acdca..ca55cff48b 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11289,7 +11289,7 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
 }
 
 /*
- * Release an ASO CT action.
+ * Release an ASO CT action by its own device.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
@@ -11300,12 +11300,12 @@ flow_dv_translate_create_aso_age(struct rte_eth_dev *dev,
  *   0 when CT action was removed, otherwise the number of references.
  */
 static inline int
-flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
+flow_dv_aso_ct_dev_release(struct rte_eth_dev *dev, uint32_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
 	uint32_t ret;
-	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
+	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_dev_idx(dev, idx);
 	enum mlx5_aso_ct_state state =
 			__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
 
@@ -11334,7 +11334,21 @@ flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
 		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
 		rte_spinlock_unlock(&mng->ct_sl);
 	}
-	return ret;
+	return (int)ret;
+}
+
+static inline int
+flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t own_idx)
+{
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(own_idx);
+	uint32_t idx = MLX5_INDIRECT_ACT_CT_GET_IDX(own_idx);
+	struct rte_eth_dev *owndev = &rte_eth_devices[owner];
+	RTE_SET_USED(dev);
+
+	MLX5_ASSERT(owner < RTE_MAX_ETHPORTS);
+	if (dev->data->dev_started != 1)
+		return -1;
+	return flow_dv_aso_ct_dev_release(owndev, idx);
 }
 
 /*
@@ -11486,7 +11500,7 @@ flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
 		RTE_SET_USED(reg_c);
 #endif
 		if (!ct->dr_action_orig) {
-			flow_dv_aso_ct_release(dev, ct_idx);
+			flow_dv_aso_ct_dev_release(dev, ct_idx);
 			rte_flow_error_set(error, rte_errno,
 					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					   "failed to create ASO CT action");
@@ -11502,7 +11516,7 @@ flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
 			 reg_c - REG_C_0);
 #endif
 		if (!ct->dr_action_rply) {
-			flow_dv_aso_ct_release(dev, ct_idx);
+			flow_dv_aso_ct_dev_release(dev, ct_idx);
 			rte_flow_error_set(error, rte_errno,
 					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					   "failed to create ASO CT action");
@@ -11544,12 +11558,13 @@ flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
 		return rte_flow_error_set(error, rte_errno,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to allocate CT object");
-	ct = flow_aso_ct_get_by_idx(dev, idx);
+	ct = flow_aso_ct_get_by_dev_idx(dev, idx);
 	if (mlx5_aso_ct_update_by_wqe(sh, ct, pro))
 		return rte_flow_error_set(error, EBUSY,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to update CT");
 	ct->is_original = !!pro->is_original_dir;
+	ct->peer = pro->peer_port;
 	return idx;
 }
 
@@ -11713,7 +11728,7 @@ flow_dv_translate(struct rte_eth_dev *dev,
 		const struct rte_flow_action *found_action = NULL;
 		uint32_t jump_group = 0;
 		struct mlx5_flow_counter *cnt;
-		uint32_t ct_idx;
+		uint32_t owner_idx;
 		struct mlx5_aso_ct_action *ct;
 
 		if (!mlx5_flow_os_action_supported(action_type))
@@ -12189,8 +12204,13 @@ flow_dv_translate(struct rte_eth_dev *dev,
 			action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
 			break;
 		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
-			ct_idx = (uint32_t)(uintptr_t)action->conf;
-			ct = flow_aso_ct_get_by_idx(dev, ct_idx);
+			owner_idx = (uint32_t)(uintptr_t)action->conf;
+			ct = flow_aso_ct_get_by_idx(dev, owner_idx);
+			if (!ct)
+				return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						NULL,
+						"Failed to get CT object.");
 			if (mlx5_aso_ct_available(priv->sh, ct))
 				return rte_flow_error_set(error, rte_errno,
 						RTE_FLOW_ERROR_TYPE_ACTION,
@@ -12203,7 +12223,7 @@ flow_dv_translate(struct rte_eth_dev *dev,
 				dev_flow->dv.actions[actions_n] =
 							ct->dr_action_rply;
 			flow->indirect_type = MLX5_INDIRECT_ACTION_TYPE_CT;
-			flow->ct = ct_idx;
+			flow->ct = owner_idx;
 			__atomic_fetch_add(&ct->refcnt, 1, __ATOMIC_RELAXED);
 			actions_n++;
 			action_flags |= MLX5_FLOW_ACTION_CT;
@@ -13803,8 +13823,7 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
 		ret = flow_dv_translate_create_conntrack(dev, action->conf,
 							 err);
-		idx = (MLX5_INDIRECT_ACTION_TYPE_CT <<
-		       MLX5_INDIRECT_ACTION_TYPE_OFFSET) | ret;
+		idx = MLX5_INDIRECT_ACT_CT_GEN_IDX(PORT_ID(priv), ret);
 		break;
 	default:
 		rte_flow_error_set(err, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
@@ -13856,7 +13875,9 @@ flow_dv_action_destroy(struct rte_eth_dev *dev,
 		return 0;
 	case MLX5_INDIRECT_ACTION_TYPE_CT:
 		ret = flow_dv_aso_ct_release(dev, idx);
-		if (ret)
+		if (ret < 0)
+			return ret;
+		if (ret > 0)
 			DRV_LOG(DEBUG, "Connection tracking object %u still "
 				"has references %d.", idx, ret);
 		return 0;
@@ -13960,8 +13981,16 @@ __flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
 	struct mlx5_aso_ct_action *ct;
 	const struct rte_flow_action_conntrack *new_prf;
 	int ret = 0;
+	uint16_t owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(idx);
+	uint32_t dev_idx;
 
-	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (PORT_ID(priv) != owner)
+		return rte_flow_error_set(error, EACCES,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "CT object owned by another port");
+	dev_idx = MLX5_INDIRECT_ACT_CT_GET_IDX(idx);
+	ct = flow_aso_ct_get_by_dev_idx(dev, dev_idx);
 	if (!ct->refcnt)
 		return rte_flow_error_set(error, ENOMEM,
 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -14049,6 +14078,8 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_action *ct;
+	uint16_t owner;
+	uint32_t dev_idx;
 
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
@@ -14063,7 +14094,15 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 			     (&age_param->sec_since_last_hit, __ATOMIC_RELAXED);
 		return 0;
 	case MLX5_INDIRECT_ACTION_TYPE_CT:
-		ct = flow_aso_ct_get_by_idx(dev, idx);
+		owner = (uint16_t)MLX5_INDIRECT_ACT_CT_GET_OWNER(idx);
+		if (owner != PORT_ID(priv))
+			return rte_flow_error_set(error, EACCES,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"CT object owned by another port");
+		dev_idx = MLX5_INDIRECT_ACT_CT_GET_IDX(idx);
+		ct = flow_aso_ct_get_by_dev_idx(dev, dev_idx);
+		MLX5_ASSERT(ct);
 		if (!ct->refcnt)
 			return rte_flow_error_set(error, EFAULT,
 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v5 17/17] doc: update mlx5 support for conntrack
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (15 preceding siblings ...)
  2021-05-05  8:05   ` [dpdk-dev] [PATCH v5 16/17] net/mlx5: add support of CT between two ports Bing Zhao
@ 2021-05-05  8:06   ` Bing Zhao
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  8:06 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

In the release notes and mlx5 NIC document, the support and
limitation of connection tracking are added.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 doc/guides/nics/features/default.ini   |  1 +
 doc/guides/nics/features/mlx5.ini      |  1 +
 doc/guides/nics/mlx5.rst               | 14 ++++++++++++++
 doc/guides/rel_notes/release_21_05.rst |  2 ++
 4 files changed, 18 insertions(+)

diff --git a/doc/guides/nics/features/default.ini b/doc/guides/nics/features/default.ini
index 8046bd121e..0deb4ef547 100644
--- a/doc/guides/nics/features/default.ini
+++ b/doc/guides/nics/features/default.ini
@@ -66,6 +66,7 @@ Module EEPROM dump   =
 Registers dump       =
 LED                  =
 Multiprocess aware   =
+Connection tracking  =
 FreeBSD              =
 Linux                =
 Windows              =
diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index ddd131da16..45dbe75d07 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -45,6 +45,7 @@ Stats per queue      = Y
 FW version           = Y
 Module EEPROM dump   = Y
 Multiprocess aware   = Y
+Connection tracking  = Y
 Linux                = Y
 Windows              = P
 ARMv8                = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 2bb4f18a08..238da94118 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -107,6 +107,7 @@ Features
 - 21844 flow priorities for ingress or egress flow groups greater than 0 and for any transfer
   flow group.
 - Flow metering, including meter policy API.
+- Connection tracking.
 
 Limitations
 -----------
@@ -418,6 +419,14 @@ Limitations
      - RED: must be DROP.
   - meter profile packet mode is supported.
 
+- Connection tracking:
+
+  - connection tracking (conntrack) couldn't co-exist with ASO meter, ASO age action in a single flow rule.
+  - Flow rules insertion rate and memory consumption.
+  - software limitation:
+     - ports: a maximal number of 256.
+     - conntrack: a maximal number of 4M.
+
 Statistics
 ----------
 
@@ -1680,6 +1689,11 @@ Supported hardware offloads
    |                       | | rdma-core 35  | | rdma-core 35  |
    |                       | | ConnectX-5    | | ConnectX-5    |
    +-----------------------+-----------------+-----------------+
+   | Connection tracking   | |               | | DPDK 21.05    |
+   |                       | |     N/A       | | OFED 5.3      |
+   |                       | |               | | rdma-core 35  |
+   |                       | |               | | ConnectX-6 Dx |
+   +-----------------------+-----------------+-----------------+
 
 .. table:: Minimal SW/HW versions for shared action offload
    :name: sact
diff --git a/doc/guides/rel_notes/release_21_05.rst b/doc/guides/rel_notes/release_21_05.rst
index efd68e8c7c..4c4c37ef87 100644
--- a/doc/guides/rel_notes/release_21_05.rst
+++ b/doc/guides/rel_notes/release_21_05.rst
@@ -166,6 +166,8 @@ New Features
   * Added support for ASO (Advanced Steering Operation) meter.
   * Added support for ASO metering by PPS (packet per second).
   * Added support for the monitor policy of Power Management API.
+  * Added support for connection tracking action and item as well as context create,
+    destroy, update and query.
 
 * **Updated NXP DPAA driver.**
 
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v6 00/17] conntrack support in mlx5 PMD
  2021-04-27 15:37 [dpdk-dev] [PATCH 00/17] conntrack support in mlx5 PMD Bing Zhao
                   ` (21 preceding siblings ...)
  2021-05-05  8:05 ` [dpdk-dev] [PATCH v5 00/17] conntrack support in mlx5 PMD Bing Zhao
@ 2021-05-05  9:49 ` Bing Zhao
  2021-05-05  9:49   ` [dpdk-dev] [PATCH v6 01/17] common/mlx5: add connection tracking object definition Bing Zhao
                     ` (16 more replies)
  2021-05-05 12:23 ` [dpdk-dev] [PATCH v7 00/17] conntrack support in mlx5 PMD Bing Zhao
  23 siblings, 17 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  9:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

This patch set adds the connection tracking offload support in the
mlx5 driver, as well as the documents update.
 
---
v2: code bug fixes, commits clean up and doc update
v3: fix error input pointer for CT MR registering
v4: fix typo in commit message of patch 11
v5: adjust mkey to lkey in SQ initialization
v6: rebase and solve the conflict
---

Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

Bing Zhao (17):
  common/mlx5: add connection tracking object definition
  common/mlx5: add CT offload capability checking
  net/mlx5: use meter color reg for CT
  net/mlx5: initialization of CT management
  common/mlx5: add Dexv CT objects creation
  net/mlx5: add modify support for CT
  net/mlx5: add actions creating for CT
  net/mlx5: close CT management structure
  net/mlx5: add ASO CT query implementation
  net/mlx5: add ASO CT destroy handling
  net/mlx5: add translation of CT action
  net/mlx5: add translation of CT item
  net/mlx5: add CT context update
  net/mlx5: validation of CT action
  net/mlx5: validation of CT item
  net/mlx5: add support of CT between two ports
  doc: update mlx5 support for conntrack

 doc/guides/nics/features/default.ini   |   1 +
 doc/guides/nics/features/mlx5.ini      |   1 +
 doc/guides/nics/mlx5.rst               |  14 +
 doc/guides/rel_notes/release_21_05.rst |   2 +
 drivers/common/mlx5/linux/meson.build  |   2 +
 drivers/common/mlx5/mlx5_devx_cmds.c   |  53 +++
 drivers/common/mlx5/mlx5_devx_cmds.h   |   5 +
 drivers/common/mlx5/mlx5_prm.h         |  88 ++++
 drivers/common/mlx5/version.map        |   1 +
 drivers/net/mlx5/linux/mlx5_os.c       |  13 +
 drivers/net/mlx5/mlx5.c                |  92 ++++
 drivers/net/mlx5/mlx5.h                |  76 ++++
 drivers/net/mlx5/mlx5_flow.c           |  44 +-
 drivers/net/mlx5/mlx5_flow.h           | 101 ++++-
 drivers/net/mlx5/mlx5_flow_aso.c       | 592 ++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c        | 601 ++++++++++++++++++++++++-
 16 files changed, 1683 insertions(+), 3 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v6 01/17] common/mlx5: add connection tracking object definition
  2021-05-05  9:49 ` [dpdk-dev] [PATCH v6 00/17] conntrack support in mlx5 PMD Bing Zhao
@ 2021-05-05  9:49   ` Bing Zhao
  2021-05-05  9:49   ` [dpdk-dev] [PATCH v6 02/17] common/mlx5: add CT offload capability checking Bing Zhao
                     ` (15 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  9:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The structures of ASO connection tracking offload object are added
based on the definitions in the PRM. One CT object context will be
loaded into the cache completely in a reversed order of dwords. The
valid bit should be the MSB of the last dword. This is used for the
conntrack context creation and update, as well as for the query.

The capabilities 2 (HCA_CAP_2) layout is also added. The connection
tracking related capabilities could be queried via the HCA_CAP_2.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/mlx5_prm.h | 85 ++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 330101233a..683ab40338 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1124,6 +1124,7 @@ enum {
 	MLX5_GET_HCA_CAP_OP_MOD_ROCE = 0x4 << 1,
 	MLX5_GET_HCA_CAP_OP_MOD_NIC_FLOW_TABLE = 0x7 << 1,
 	MLX5_GET_HCA_CAP_OP_MOD_VDPA_EMULATION = 0x13 << 1,
+	MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE_2 = 0x20 << 1,
 };
 
 #define MLX5_GENERAL_OBJ_TYPES_CAP_VIRTQ_NET_Q \
@@ -1692,6 +1693,29 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 	       ft_field_support_2_nic_receive;
 };
 
+struct mlx5_ifc_cmd_hca_cap_2_bits {
+	u8 reserved_at_0[0x80]; /* End of DW4. */
+	u8 reserved_at_80[0xb];
+	u8 log_max_num_reserved_qpn[0x5];
+	u8 reserved_at_90[0x3];
+	u8 log_reserved_qpn_granularity[0x5];
+	u8 reserved_at_98[0x3];
+	u8 log_reserved_qpn_max_alloc[0x5]; /* End of DW5. */
+	u8 max_reformat_insert_size[0x8];
+	u8 max_reformat_insert_offset[0x8];
+	u8 max_reformat_remove_size[0x8];
+	u8 max_reformat_remove_offset[0x8]; /* End of DW6. */
+	u8 aso_conntrack_reg_id[0x8];
+	u8 reserved_at_c8[0x3];
+	u8 log_conn_track_granularity[0x5];
+	u8 reserved_at_d0[0x3];
+	u8 log_conn_track_max_alloc[0x5];
+	u8 reserved_at_d8[0x3];
+	u8 log_max_conn_track_offload[0x5];
+	u8 reserved_at_e0[0x20]; /* End of DW7. */
+	u8 reserved_at_100[0x700];
+};
+
 union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
 	struct mlx5_ifc_per_protocol_networking_offload_caps_bits
@@ -2630,6 +2654,67 @@ struct mlx5_ifc_create_flow_meter_aso_in_bits {
 	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
 	struct mlx5_ifc_flow_meter_aso_bits flow_meter_aso;
 };
+
+struct mlx5_ifc_tcp_window_params_bits {
+	u8 max_ack[0x20];
+	u8 max_win[0x20];
+	u8 reply_end[0x20];
+	u8 sent_end[0x20];
+};
+
+struct mlx5_ifc_conn_track_aso_bits {
+	struct mlx5_ifc_tcp_window_params_bits reply_dir; /* End of DW3. */
+	struct mlx5_ifc_tcp_window_params_bits original_dir; /* End of DW7. */
+	u8 last_end[0x20]; /* End of DW8. */
+	u8 last_ack[0x20]; /* End of DW9. */
+	u8 last_seq[0x20]; /* End of DW10. */
+	u8 last_win[0x10];
+	u8 reserved_at_170[0xa];
+	u8 last_dir[0x1];
+	u8 last_index[0x5]; /* End of DW11. */
+	u8 reserved_at_180[0x40]; /* End of DW13. */
+	u8 reply_direction_tcp_scale[0x4];
+	u8 reply_direction_tcp_close_initiated[0x1];
+	u8 reply_direction_tcp_liberal_enabled[0x1];
+	u8 reply_direction_tcp_data_unacked[0x1];
+	u8 reply_direction_tcp_max_ack[0x1];
+	u8 reserved_at_1c8[0x8];
+	u8 original_direction_tcp_scale[0x4];
+	u8 original_direction_tcp_close_initiated[0x1];
+	u8 original_direction_tcp_liberal_enabled[0x1];
+	u8 original_direction_tcp_data_unacked[0x1];
+	u8 original_direction_tcp_max_ack[0x1];
+	u8 reserved_at_1d8[0x8]; /* End of DW14. */
+	u8 valid[0x1];
+	u8 state[0x3];
+	u8 freeze_track[0x1];
+	u8 reserved_at_1e5[0xb];
+	u8 reserved_at_1f0[0x1];
+	u8 connection_assured[0x1];
+	u8 sack_permitted[0x1];
+	u8 challenged_acked[0x1];
+	u8 heartbeat[0x1];
+	u8 max_ack_window[0x3];
+	u8 reserved_at_1f8[0x1];
+	u8 retransmission_counter[0x3];
+	u8 retranmission_limit_exceeded[0x1];
+	u8 retranmission_limit[0x3]; /* End of DW15. */
+};
+
+struct mlx5_ifc_conn_track_offload_bits {
+	u8 modify_field_select[0x40];
+	u8 reserved_at_40[0x40];
+	u8 reserved_at_80[0x8];
+	u8 conn_track_aso_access_pd[0x18];
+	u8 reserved_at_a0[0x160];
+	struct mlx5_ifc_conn_track_aso_bits conn_track_aso;
+};
+
+struct mlx5_ifc_create_conn_track_aso_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+	struct mlx5_ifc_conn_track_offload_bits conn_track_offload;
+};
+
 enum mlx5_access_aso_opc_mod {
 	ASO_OPC_MOD_IPSEC = 0x0,
 	ASO_OPC_MOD_CONNECTION_TRACKING = 0x1,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v6 02/17] common/mlx5: add CT offload capability checking
  2021-05-05  9:49 ` [dpdk-dev] [PATCH v6 00/17] conntrack support in mlx5 PMD Bing Zhao
  2021-05-05  9:49   ` [dpdk-dev] [PATCH v6 01/17] common/mlx5: add connection tracking object definition Bing Zhao
@ 2021-05-05  9:49   ` Bing Zhao
  2021-05-05  9:49   ` [dpdk-dev] [PATCH v6 03/17] net/mlx5: use meter color reg for CT Bing Zhao
                     ` (14 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  9:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

During startup, the ASO connection tracking offload capability could
be queried via HCA_CAP_QUERY command. If the HW doesn't support ASO
CT, the value would be 0 by default. The following initialization
should be skipped and the creation of the CT object should return
a failure directly.

The following CT creation should also check this capability. With
the old driver, the pre-processing macro should be used in order to
make the compiling pass.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/linux/meson.build | 2 ++
 drivers/common/mlx5/mlx5_devx_cmds.c  | 3 +++
 drivers/common/mlx5/mlx5_devx_cmds.h  | 1 +
 drivers/common/mlx5/mlx5_prm.h        | 3 +++
 4 files changed, 9 insertions(+)

diff --git a/drivers/common/mlx5/linux/meson.build b/drivers/common/mlx5/linux/meson.build
index 3334bd5cb2..007834a49b 100644
--- a/drivers/common/mlx5/linux/meson.build
+++ b/drivers/common/mlx5/linux/meson.build
@@ -189,6 +189,8 @@ has_sym_args = [
             'MLX5_WQE_UMR_CTRL_FLAG_INLINE' ],
         [ 'HAVE_MLX5_DR_FLOW_DUMP_RULE', 'infiniband/mlx5dv.h',
             'mlx5dv_dump_dr_rule' ],
+        [ 'HAVE_MLX5_DR_ACTION_ASO_CT', 'infiniband/mlx5dv.h',
+            'MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_INITIATOR' ],
 ]
 config = configuration_data()
 foreach arg:has_sym_args
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 1b54c05313..7a0efa59e5 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -783,6 +783,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
 		MLX5_GET(cmd_hca_cap, hcattr, umr_indirect_mkey_disabled);
 	attr->umr_modify_entity_size_disabled =
 		MLX5_GET(cmd_hca_cap, hcattr, umr_modify_entity_size_disabled);
+	attr->ct_offload = !!(MLX5_GET64(cmd_hca_cap, hcattr,
+					 general_obj_types) &
+			      MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD);
 	if (attr->qos.sup) {
 		MLX5_SET(query_hca_cap_in, in, op_mod,
 			 MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP |
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 5681e03fee..e6f9b90293 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -137,6 +137,7 @@ struct mlx5_hca_attr {
 	uint32_t qp_ts_format:2;
 	uint32_t regex:1;
 	uint32_t reg_c_preserve:1;
+	uint32_t ct_offload:1; /* General obj type ASO CT offload supported. */
 	uint32_t regexp_num_of_engines;
 	uint32_t log_max_ft_sampler_num:8;
 	uint32_t geneve_tlv_opt;
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 683ab40338..b385b6f518 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1139,6 +1139,8 @@ enum {
 			(1ULL << MLX5_GENERAL_OBJ_TYPE_FLOW_METER_ASO)
 #define MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT \
 			(1ULL << MLX5_OBJ_TYPE_GENEVE_TLV_OPT)
+#define MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD \
+			(1ULL << MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD)
 
 enum {
 	MLX5_HCA_CAP_OPMOD_GET_MAX   = 0,
@@ -2487,6 +2489,7 @@ enum {
 	MLX5_GENERAL_OBJ_TYPE_FLEX_PARSE_GRAPH = 0x0022,
 	MLX5_GENERAL_OBJ_TYPE_FLOW_METER_ASO = 0x0024,
 	MLX5_GENERAL_OBJ_TYPE_FLOW_HIT_ASO = 0x0025,
+	MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD = 0x0031,
 };
 
 struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v6 03/17] net/mlx5: use meter color reg for CT
  2021-05-05  9:49 ` [dpdk-dev] [PATCH v6 00/17] conntrack support in mlx5 PMD Bing Zhao
  2021-05-05  9:49   ` [dpdk-dev] [PATCH v6 01/17] common/mlx5: add connection tracking object definition Bing Zhao
  2021-05-05  9:49   ` [dpdk-dev] [PATCH v6 02/17] common/mlx5: add CT offload capability checking Bing Zhao
@ 2021-05-05  9:49   ` Bing Zhao
  2021-05-05  9:49   ` [dpdk-dev] [PATCH v6 04/17] net/mlx5: initialization of CT management Bing Zhao
                     ` (13 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  9:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Based on the capacity, 3 registers could be used. Due to the register
allocation, only the one REG_C_3 for meter color could be reused
right now.

Then in the same flow, no more than one ASO action can be supported.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.c | 4 +++-
 drivers/net/mlx5/mlx5_flow.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index a9c0108ee3..65399cd452 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -760,7 +760,9 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
 			       REG_C_3;
 	case MLX5_MTR_COLOR:
-	case MLX5_ASO_FLOW_HIT: /* Both features use the same REG_C. */
+	case MLX5_ASO_FLOW_HIT:
+	case MLX5_ASO_CONNTRACK:
+		/* All features use the same REG_C. */
 		MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
 		return priv->mtr_color_reg;
 	case MLX5_COPY_MARK:
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 0fb8f64474..402c829843 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -85,6 +85,7 @@ enum mlx5_feature_name {
 	MLX5_MTR_COLOR,
 	MLX5_MTR_ID,
 	MLX5_ASO_FLOW_HIT,
+	MLX5_ASO_CONNTRACK,
 };
 
 /* Default queue number. */
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v6 04/17] net/mlx5: initialization of CT management
  2021-05-05  9:49 ` [dpdk-dev] [PATCH v6 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (2 preceding siblings ...)
  2021-05-05  9:49   ` [dpdk-dev] [PATCH v6 03/17] net/mlx5: use meter color reg for CT Bing Zhao
@ 2021-05-05  9:49   ` Bing Zhao
  2021-05-05  9:49   ` [dpdk-dev] [PATCH v6 05/17] common/mlx5: add Dexv CT objects creation Bing Zhao
                     ` (12 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  9:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

The definitions of ASO connection tracking objects management
structures are added.

Considering performance, the bulk allocation of ASO CT objects
should be used. The maximal value per bulk and the granularity could
be fetched from HCA capabilities 2. Right now, a fixed number of 64
is used for each bulk for a better management purpose.

The ASO QP for CT is initialized, the SQ will be used for both
modify and query command.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c | 13 +++++++++
 drivers/net/mlx5/mlx5.c          | 36 +++++++++++++++++++++++
 drivers/net/mlx5/mlx5.h          | 50 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_aso.c | 50 ++++++++++++++++++++++++++++++++
 4 files changed, 149 insertions(+)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 479ee7d8d1..5ac787106d 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1323,6 +1323,19 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			DRV_LOG(DEBUG, "Flow Hit ASO is supported.");
 		}
 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */
+#if defined(HAVE_MLX5_DR_CREATE_ACTION_ASO) && \
+	defined(HAVE_MLX5_DR_ACTION_ASO_CT)
+		if (config->hca_attr.ct_offload &&
+		    priv->mtr_color_reg == REG_C_3) {
+			err = mlx5_flow_aso_ct_mng_init(sh);
+			if (err) {
+				err = -err;
+				goto error;
+			}
+			DRV_LOG(DEBUG, "CT ASO is supported.");
+			sh->ct_aso_en = 1;
+		}
+#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO && HAVE_MLX5_DR_ACTION_ASO_CT */
 #if defined(HAVE_MLX5DV_DR) && defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_SAMPLE)
 		if (config->hca_attr.log_max_ft_sampler_num > 0  &&
 		    config->dv_flow_en) {
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 912b6a33b4..7e83d09fec 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -672,6 +672,42 @@ mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh)
 	}
 }
 
+/*
+ * Initialize the ASO connection tracking structure.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
+{
+	int err;
+
+	if (sh->ct_mng)
+		return 0;
+	sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng),
+				 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+	if (!sh->ct_mng) {
+		DRV_LOG(ERR, "ASO CT management allocation failed.");
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
+	if (err) {
+		mlx5_free(sh->ct_mng);
+		/* rte_errno should be extracted from the failure. */
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	rte_spinlock_init(&sh->ct_mng->ct_sl);
+	rte_rwlock_init(&sh->ct_mng->resize_rwl);
+	LIST_INIT(&sh->ct_mng->free_cts);
+	return 0;
+}
+
 /**
  * Initialize the flow resources' indexed mempool.
  *
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index b042f37231..0ff7b8c2bc 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -988,6 +988,52 @@ struct mlx5_bond_info {
 	} ports[MLX5_BOND_MAX_PORTS];
 };
 
+/* Number of connection tracking objects per pool: must be a power of 2. */
+#define MLX5_ASO_CT_ACTIONS_PER_POOL 64
+
+/* ASO Conntrack state. */
+enum mlx5_aso_ct_state {
+	ASO_CONNTRACK_FREE, /* Inactive, in the free list. */
+	ASO_CONNTRACK_WAIT, /* WQE sent in the SQ. */
+	ASO_CONNTRACK_READY, /* CQE received w/o error. */
+	ASO_CONNTRACK_QUERY, /* WQE for query sent. */
+	ASO_CONNTRACK_MAX, /* Guard. */
+};
+
+/* Generic ASO connection tracking structure. */
+struct mlx5_aso_ct_action {
+	LIST_ENTRY(mlx5_aso_ct_action) next; /* Pointer to the next ASO CT. */
+	void *dr_action_orig; /* General action object for original dir. */
+	void *dr_action_rply; /* General action object for reply dir. */
+	uint32_t refcnt; /* Action used count in device flows. */
+	uint16_t offset; /* Offset of ASO CT in DevX objects bulk. */
+	uint16_t peer; /* The only peer port index could also use this CT. */
+	enum mlx5_aso_ct_state state; /* ASO CT state. */
+	bool is_original; /* The direction of the DR action to be used. */
+};
+
+/* ASO connection tracking software pool definition. */
+struct mlx5_aso_ct_pool {
+	uint16_t index; /* Pool index in pools array. */
+	struct mlx5_devx_obj *devx_obj;
+	/* The first devx object in the bulk, used for freeing (not yet). */
+	struct mlx5_aso_ct_action actions[MLX5_ASO_CT_ACTIONS_PER_POOL];
+	/* CT action structures bulk. */
+};
+
+LIST_HEAD(aso_ct_list, mlx5_aso_ct_action);
+
+/* Pools management structure for ASO connection tracking pools. */
+struct mlx5_aso_ct_pools_mng {
+	struct mlx5_aso_ct_pool **pools;
+	uint16_t n; /* Total number of pools. */
+	uint16_t next; /* Number of pools in use, index of next free pool. */
+	rte_spinlock_t ct_sl; /* The ASO CT free list lock. */
+	rte_rwlock_t resize_rwl; /* The ASO CT pool resize lock. */
+	struct aso_ct_list free_cts; /* Free ASO CT objects list. */
+	struct mlx5_aso_sq aso_sq; /* ASO queue objects. */
+};
+
 /*
  * Shared Infiniband device context for Master/Representors
  * which belong to same IB device with multiple IB ports.
@@ -1001,6 +1047,7 @@ struct mlx5_dev_ctx_shared {
 	uint32_t sq_ts_format:2; /* SQ timestamp formats supported. */
 	uint32_t qp_ts_format:2; /* QP timestamp formats supported. */
 	uint32_t meter_aso_en:1; /* Flow Meter ASO is supported. */
+	uint32_t ct_aso_en:1; /* Connection Tracking ASO is supported. */
 	uint32_t max_port; /* Maximal IB device port index. */
 	struct mlx5_bond_info bond; /* Bonding information. */
 	void *ctx; /* Verbs/DV/DevX context. */
@@ -1063,6 +1110,8 @@ struct mlx5_dev_ctx_shared {
 	rte_spinlock_t geneve_tlv_opt_sl; /* Lock for geneve tlv resource */
 	struct mlx5_flow_mtr_mng *mtrmng;
 	/* Meter management structure. */
+	struct mlx5_aso_ct_pools_mng *ct_mng;
+	/* Management data for ASO connection tracking. */
 	struct mlx5_dev_shared_port port[]; /* per device port data array. */
 };
 
@@ -1360,6 +1409,7 @@ bool mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev);
 int mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev);
 int mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh);
 int mlx5_aso_flow_mtrs_mng_init(struct mlx5_dev_ctx_shared *sh);
+int mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh);
 
 /* mlx5_ethdev.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 300987d0e9..9f2d21b375 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -186,6 +186,43 @@ mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq)
 	}
 }
 
+/*
+ * Initialize Send Queue used for ASO connection tracking.
+ *
+ * @param[in] sq
+ *   ASO SQ to initialize.
+ */
+static void
+mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
+{
+	volatile struct mlx5_aso_wqe *restrict wqe;
+	int i;
+	int size = 1 << sq->log_desc_n;
+	uint64_t addr;
+
+	/* All the next fields state should stay constant. */
+	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
+		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
+							  (sizeof(*wqe) >> 4));
+		/* One unique MR for the query data. */
+		wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
+		/* Magic number 64 represents the length of a ASO CT obj. */
+		addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64);
+		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
+		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
+		/*
+		 * The values of operand_masks are different for modify
+		 * and query.
+		 * And data_mask may be different for each modification. In
+		 * query, it could be zero and ignored.
+		 * CQE generation is always needed, in order to decide when
+		 * it is available to create the flow or read the data.
+		 */
+		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
+						   MLX5_COMP_MODE_OFFSET);
+	}
+}
+
 /**
  * Create Send Queue used for ASO access.
  *
@@ -293,6 +330,19 @@ mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
 			return -1;
 		mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
 		break;
+	case ASO_OPC_MOD_CONNECTION_TRACKING:
+		/* 64B per object for query. */
+		if (mlx5_aso_reg_mr(sh, 64 * sq_desc_n,
+				    &sh->ct_mng->aso_sq.mr, 0))
+			return -1;
+		if (mlx5_aso_sq_create(sh->ctx, &sh->ct_mng->aso_sq, 0,
+				sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
+				sh->sq_ts_format)) {
+			mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
+			return -1;
+		}
+		mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
+		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
 		return -1;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v6 05/17] common/mlx5: add Dexv CT objects creation
  2021-05-05  9:49 ` [dpdk-dev] [PATCH v6 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (3 preceding siblings ...)
  2021-05-05  9:49   ` [dpdk-dev] [PATCH v6 04/17] net/mlx5: initialization of CT management Bing Zhao
@ 2021-05-05  9:49   ` Bing Zhao
  2021-05-05  9:49   ` [dpdk-dev] [PATCH v6 06/17] net/mlx5: add modify support for CT Bing Zhao
                     ` (11 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  9:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Adding support for connection tracking ASO creation via Devx command.
Right now only bulk creation is supported.

By default, the objects with zero contents will be created. Before
using a single object, the modification via posting a WQE to the ASO
CT SQ is needed.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c | 50 ++++++++++++++++++++++++++++
 drivers/common/mlx5/mlx5_devx_cmds.h |  4 +++
 drivers/common/mlx5/version.map      |  1 +
 3 files changed, 55 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 7a0efa59e5..3f89796eb4 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -2255,6 +2255,56 @@ mlx5_devx_cmd_create_flow_meter_aso_obj(void *ctx, uint32_t pd,
 	return flow_meter_aso_obj;
 }
 
+/*
+ * Create general object of type CONN_TRACK_OFFLOAD using DevX API.
+ *
+ * @param[in] ctx
+ *   Context returned from mlx5 open_device() glue function.
+ * @param [in] pd
+ *   PD value to associate the CONN_TRACK_OFFLOAD ASO object with.
+ * @param [in] log_obj_size
+ *   log_obj_size to allocate its power of 2 * objects
+ *   in one CONN_TRACK_OFFLOAD bulk allocation.
+ *
+ * @return
+ *   The DevX object created, NULL otherwise and rte_errno is set.
+ */
+struct mlx5_devx_obj *
+mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx, uint32_t pd,
+					    uint32_t log_obj_size)
+{
+	uint32_t in[MLX5_ST_SZ_DW(create_conn_track_aso_in)] = {0};
+	uint32_t out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	struct mlx5_devx_obj *ct_aso_obj;
+	void *ptr;
+
+	ct_aso_obj = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ct_aso_obj),
+				 0, SOCKET_ID_ANY);
+	if (!ct_aso_obj) {
+		DRV_LOG(ERR, "Failed to allocate CONN_TRACK_OFFLOAD object.");
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	ptr = MLX5_ADDR_OF(create_conn_track_aso_in, in, hdr);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, obj_type,
+		 MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD);
+	MLX5_SET(general_obj_in_cmd_hdr, ptr, log_obj_range, log_obj_size);
+	ptr = MLX5_ADDR_OF(create_conn_track_aso_in, in, conn_track_offload);
+	MLX5_SET(conn_track_offload, ptr, conn_track_aso_access_pd, pd);
+	ct_aso_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
+						     out, sizeof(out));
+	if (!ct_aso_obj->obj) {
+		rte_errno = errno;
+		DRV_LOG(ERR, "Failed to create CONN_TRACK_OFFLOAD obj by using DevX.");
+		mlx5_free(ct_aso_obj);
+		return NULL;
+	}
+	ct_aso_obj->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	return ct_aso_obj;
+}
+
 /**
  * Create general object of type GENEVE TLV option using DevX API.
  *
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index e6f9b90293..58dc123778 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -570,6 +570,10 @@ struct mlx5_devx_obj *mlx5_devx_cmd_queue_counter_alloc(void *ctx);
 __rte_internal
 int mlx5_devx_cmd_queue_counter_query(struct mlx5_devx_obj *dcs, int clear,
 				      uint32_t *out_of_buffers);
+__rte_internal
+struct mlx5_devx_obj *mlx5_devx_cmd_create_conn_track_offload_obj(void *ctx,
+					uint32_t pd, uint32_t log_obj_size);
+
 /**
  * Create general object of type FLOW_METER_ASO using DevX API..
  *
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index 18dc96276d..4bbcba5b8e 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -13,6 +13,7 @@ INTERNAL {
 	mlx5_dev_to_pci_addr; # WINDOWS_NO_EXPORT
 
 	mlx5_devx_cmd_alloc_pd;
+	mlx5_devx_cmd_create_conn_track_offload_obj;
 	mlx5_devx_cmd_create_cq;
 	mlx5_devx_cmd_create_flex_parser;
 	mlx5_devx_cmd_create_qp;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v6 06/17] net/mlx5: add modify support for CT
  2021-05-05  9:49 ` [dpdk-dev] [PATCH v6 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (4 preceding siblings ...)
  2021-05-05  9:49   ` [dpdk-dev] [PATCH v6 05/17] common/mlx5: add Dexv CT objects creation Bing Zhao
@ 2021-05-05  9:49   ` Bing Zhao
  2021-05-05  9:49   ` [dpdk-dev] [PATCH v6 07/17] net/mlx5: add actions creating " Bing Zhao
                     ` (10 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  9:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After the connection tracking object bulk is allocated, all the
objects' contents are filled with zero by default. Every
new-allocated object must be modified via WQE operation before it is
used.

In order to reduce the latency for the flow creation, an asynchronous
way is used instead of busy waiting for the CQE to be generated.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |   8 +
 drivers/net/mlx5/mlx5_flow.h     |   2 +
 drivers/net/mlx5/mlx5_flow_aso.c | 252 +++++++++++++++++++++++++++++++
 3 files changed, 262 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 0ff7b8c2bc..96b5cccf19 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -493,6 +493,7 @@ struct mlx5_aso_sq_elem {
 			uint16_t burst_size;
 		};
 		struct mlx5_aso_mtr *mtr;
+		struct mlx5_aso_ct_action *ct;
 	};
 };
 
@@ -1012,6 +1013,10 @@ struct mlx5_aso_ct_action {
 	bool is_original; /* The direction of the DR action to be used. */
 };
 
+/* CT action object state update. */
+#define MLX5_ASO_CT_UPDATE_STATE(c, s) \
+	__atomic_store_n(&((c)->state), (s), __ATOMIC_RELAXED)
+
 /* ASO connection tracking software pool definition. */
 struct mlx5_aso_ct_pool {
 	uint16_t index; /* Pool index in pools array. */
@@ -1695,5 +1700,8 @@ int mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr);
 int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr);
+int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			      struct mlx5_aso_ct_action *ct,
+			      const struct rte_flow_action_conntrack *profile);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 402c829843..71b0871bcd 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -843,6 +843,8 @@ struct mlx5_flow {
 #define MLX5_ASO_WQE_CQE_RESPONSE_DELAY 10u
 #define MLX5_MTR_POLL_WQE_CQE_TIMES 100000u
 
+#define MLX5_CT_POLL_WQE_CQE_TIMES MLX5_MTR_POLL_WQE_CQE_TIMES
+
 #define MLX5_MAN_WIDTH 8
 /* Legacy Meter parameter structure. */
 struct mlx5_legacy_flow_meter {
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 9f2d21b375..fbf6e5ef38 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -887,3 +887,255 @@ mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 			mtr->offset);
 	return -1;
 }
+
+/*
+ * Post a WQE to the ASO CT SQ to modify the context.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ * @param[in] ct
+ *   Pointer to the generic CT structure related to the context.
+ * @param[in] profile
+ *   Pointer to configuration profile.
+ *
+ * @return
+ *   1 on success (WQE number), 0 on failure.
+ */
+static uint16_t
+mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
+			      struct mlx5_aso_ct_action *ct,
+			      const struct rte_flow_action_conntrack *profile)
+{
+	volatile struct mlx5_aso_wqe *wqe = NULL;
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t res;
+	struct mlx5_aso_ct_pool *pool;
+	void *desg;
+	void *orig_dir;
+	void *reply_dir;
+
+	rte_spinlock_lock(&sq->sqsl);
+	/* Prevent other threads to update the index. */
+	res = size - (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!res)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
+		return 0;
+	}
+	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
+	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
+	/* Fill next WQE. */
+	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
+	sq->elts[sq->head & mask].ct = ct;
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	/* Each WQE will have a single CT object. */
+	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
+						  ct->offset);
+	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
+			(ASO_OPC_MOD_CONNECTION_TRACKING <<
+			 WQE_CSEG_OPC_MOD_OFFSET) |
+			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
+	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
+			(0u |
+			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
+			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
+			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
+	wqe->aso_cseg.data_mask = UINT64_MAX;
+	/* To make compiler happy. */
+	desg = (void *)(uintptr_t)wqe->aso_dseg.data;
+	MLX5_SET(conn_track_aso, desg, valid, 1);
+	MLX5_SET(conn_track_aso, desg, state, profile->state);
+	MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable);
+	MLX5_SET(conn_track_aso, desg, connection_assured,
+		 profile->live_connection);
+	MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack);
+	MLX5_SET(conn_track_aso, desg, challenged_acked,
+		 profile->challenge_ack_passed);
+	/* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */
+	MLX5_SET(conn_track_aso, desg, heartbeat, 0);
+	MLX5_SET(conn_track_aso, desg, max_ack_window,
+		 profile->max_ack_window);
+	MLX5_SET(conn_track_aso, desg, retransmission_counter, 0);
+	MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0);
+	MLX5_SET(conn_track_aso, desg, retranmission_limit,
+		 profile->retransmission_limit);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale,
+		 profile->reply_dir.scale);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated,
+		 profile->reply_dir.close_initiated);
+	/* Both directions will use the same liberal mode. */
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled,
+		 profile->liberal_mode);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked,
+		 profile->reply_dir.data_unacked);
+	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack,
+		 profile->reply_dir.last_ack_seen);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale,
+		 profile->original_dir.scale);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated,
+		 profile->original_dir.close_initiated);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled,
+		 profile->liberal_mode);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked,
+		 profile->original_dir.data_unacked);
+	MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack,
+		 profile->original_dir.last_ack_seen);
+	MLX5_SET(conn_track_aso, desg, last_win, profile->last_window);
+	MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction);
+	MLX5_SET(conn_track_aso, desg, last_index, profile->last_index);
+	MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq);
+	MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack);
+	MLX5_SET(conn_track_aso, desg, last_end, profile->last_end);
+	orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir);
+	MLX5_SET(tcp_window_params, orig_dir, sent_end,
+		 profile->original_dir.sent_end);
+	MLX5_SET(tcp_window_params, orig_dir, reply_end,
+		 profile->original_dir.reply_end);
+	MLX5_SET(tcp_window_params, orig_dir, max_win,
+		 profile->original_dir.max_win);
+	MLX5_SET(tcp_window_params, orig_dir, max_ack,
+		 profile->original_dir.max_ack);
+	reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir);
+	MLX5_SET(tcp_window_params, reply_dir, sent_end,
+		 profile->reply_dir.sent_end);
+	MLX5_SET(tcp_window_params, reply_dir, reply_end,
+		 profile->reply_dir.reply_end);
+	MLX5_SET(tcp_window_params, reply_dir, max_win,
+		 profile->reply_dir.max_win);
+	MLX5_SET(tcp_window_params, reply_dir, max_ack,
+		 profile->reply_dir.max_ack);
+	sq->head++;
+	sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
+	rte_io_wmb();
+	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
+	rte_wmb();
+	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
+	rte_wmb();
+	rte_spinlock_unlock(&sq->sqsl);
+	return 1;
+}
+
+/*
+ * Update the status field of CTs to indicate ready to be used by flows.
+ * A continuous number of CTs since last update.
+ *
+ * @param[in] sq
+ *   Pointer to ASO CT SQ.
+ * @param[in] num
+ *   Number of CT structures to be updated.
+ *
+ * @return
+ *   0 on success, a negative value.
+ */
+static void
+mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
+{
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t i;
+	struct mlx5_aso_ct_action *ct = NULL;
+	uint16_t idx;
+
+	for (i = 0; i < num; i++) {
+		idx = (uint16_t)((sq->tail + i) & mask);
+		ct = sq->elts[idx].ct;
+		MLX5_ASSERT(ct);
+		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
+	}
+}
+
+/*
+ * Handle completions from WQEs sent to ASO CT.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ */
+static void
+mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
+{
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	struct mlx5_aso_cq *cq = &sq->cq;
+	volatile struct mlx5_cqe *restrict cqe;
+	const uint32_t cq_size = 1 << cq->log_desc_n;
+	const uint32_t mask = cq_size - 1;
+	uint32_t idx;
+	uint32_t next_idx;
+	uint16_t max;
+	uint16_t n = 0;
+	int ret;
+
+	rte_spinlock_lock(&sq->sqsl);
+	max = (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!max)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		return;
+	}
+	next_idx = cq->cq_ci & mask;
+	do {
+		idx = next_idx;
+		next_idx = (cq->cq_ci + 1) & mask;
+		/* Need to confirm the position of the prefetch. */
+		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
+		cqe = &cq->cq_obj.cqes[idx];
+		ret = check_cqe(cqe, cq_size, cq->cq_ci);
+		/*
+		 * Be sure owner read is done before any other cookie field or
+		 * opaque field.
+		 */
+		rte_io_rmb();
+		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
+			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
+				break;
+			mlx5_aso_cqe_err_handle(sq);
+		} else {
+			n++;
+		}
+		cq->cq_ci++;
+	} while (1);
+	if (likely(n)) {
+		mlx5_aso_ct_status_update(sq, n);
+		sq->tail += n;
+		rte_io_wmb();
+		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
+	}
+	rte_spinlock_unlock(&sq->sqsl);
+}
+
+/*
+ * Update connection tracking ASO context by sending WQE.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ * @param[in] profile
+ *   Pointer to connection tracking TCP parameter.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			  struct mlx5_aso_ct_action *ct,
+			  const struct rte_flow_action_conntrack *profile)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+
+	MLX5_ASSERT(ct);
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		if (mlx5_aso_ct_sq_enqueue_single(mng, ct, profile))
+			return 0;
+		/* Waiting for wqe resource. */
+		rte_delay_us_sleep(10u);
+	} while (--poll_wqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+}
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v6 07/17] net/mlx5: add actions creating for CT
  2021-05-05  9:49 ` [dpdk-dev] [PATCH v6 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (5 preceding siblings ...)
  2021-05-05  9:49   ` [dpdk-dev] [PATCH v6 06/17] net/mlx5: add modify support for CT Bing Zhao
@ 2021-05-05  9:49   ` Bing Zhao
  2021-05-05  9:50   ` [dpdk-dev] [PATCH v6 08/17] net/mlx5: close CT management structure Bing Zhao
                     ` (9 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  9:49 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

Allocating a CT from the management pools and creating the DR actions
for both directions by default.

If there is no available connection tracking action, a new pool will
be created with a fixed size bulk allocation. Right now, all the
resources are controlled by the linked list.

The ASO connection tracking context associated with these actions
need to be updated via WQE before using for steering.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h         |   4 +
 drivers/net/mlx5/mlx5_flow.h    |  28 ++++
 drivers/net/mlx5/mlx5_flow_dv.c | 263 ++++++++++++++++++++++++++++++++
 3 files changed, 295 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 96b5cccf19..0f2a26efc0 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -992,6 +992,10 @@ struct mlx5_bond_info {
 /* Number of connection tracking objects per pool: must be a power of 2. */
 #define MLX5_ASO_CT_ACTIONS_PER_POOL 64
 
+/* Generate incremental and unique CT index from pool and offset. */
+#define MLX5_MAKE_CT_IDX(pool, offset) \
+	((pool) * MLX5_ASO_CT_ACTIONS_PER_POOL + (offset) + 1)
+
 /* ASO Conntrack state. */
 enum mlx5_aso_ct_state {
 	ASO_CONNTRACK_FREE, /* Inactive, in the free list. */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 71b0871bcd..0d2daa7faf 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -46,6 +46,7 @@ enum {
 	MLX5_INDIRECT_ACTION_TYPE_RSS,
 	MLX5_INDIRECT_ACTION_TYPE_AGE,
 	MLX5_INDIRECT_ACTION_TYPE_COUNT,
+	MLX5_INDIRECT_ACTION_TYPE_CT,
 };
 
 /* Matches on selected register. */
@@ -1317,6 +1318,33 @@ mlx5_validate_integrity_item(const struct rte_flow_item_integrity *item)
 	return (test.value == 0);
 }
 
+/*
+ * Get ASO CT action by index.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index to the ASO CT action.
+ *
+ * @return
+ *   The specified ASO CT action pointer.
+ */
+static inline struct mlx5_aso_ct_action *
+flow_aso_ct_get_by_idx(struct rte_eth_dev *dev, uint32_t idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_pool *pool;
+
+	idx--;
+	MLX5_ASSERT((idx / MLX5_ASO_CT_ACTIONS_PER_POOL) < mng->n);
+	/* Bit operation AND could be used. */
+	rte_rwlock_read_lock(&mng->resize_rwl);
+	pool = mng->pools[idx / MLX5_ASO_CT_ACTIONS_PER_POOL];
+	rte_rwlock_read_unlock(&mng->resize_rwl);
+	return &pool->actions[idx % MLX5_ASO_CT_ACTIONS_PER_POOL];
+}
+
 int mlx5_flow_group_to_table(struct rte_eth_dev *dev,
 			     const struct mlx5_flow_tunnel *tunnel,
 			     uint32_t group, uint32_t *table,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index c6f90e0a89..b3606e895c 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11515,6 +11515,262 @@ flow_dv_prepare_counter(struct rte_eth_dev *dev,
 	return flow_dv_counter_get_by_idx(dev, flow->counter, NULL);
 }
 
+/*
+ * Release an ASO CT action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] idx
+ *   Index of ASO CT action to release.
+ *
+ * @return
+ *   0 when CT action was removed, otherwise the number of references.
+ */
+static inline int
+flow_dv_aso_ct_release(struct rte_eth_dev *dev, uint32_t idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_action *ct = flow_aso_ct_get_by_idx(dev, idx);
+	uint32_t ret = __atomic_sub_fetch(&ct->refcnt, 1, __ATOMIC_RELAXED);
+
+	if (!ret) {
+		if (ct->dr_action_orig) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			claim_zero(mlx5_glue->destroy_flow_action
+					(ct->dr_action_orig));
+#endif
+			ct->dr_action_orig = NULL;
+		}
+		if (ct->dr_action_rply) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			claim_zero(mlx5_glue->destroy_flow_action
+					(ct->dr_action_rply));
+#endif
+			ct->dr_action_rply = NULL;
+		}
+		rte_spinlock_lock(&mng->ct_sl);
+		LIST_INSERT_HEAD(&mng->free_cts, ct, next);
+		rte_spinlock_unlock(&mng->ct_sl);
+	}
+	return ret;
+}
+
+/*
+ * Resize the ASO CT pools array by 64 pools.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value and rte_errno is set.
+ */
+static int
+flow_dv_aso_ct_pools_resize(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	void *old_pools = mng->pools;
+	/* Magic number now, need a macro. */
+	uint32_t resize = mng->n + 64;
+	uint32_t mem_size = sizeof(struct mlx5_aso_ct_pool *) * resize;
+	void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);
+
+	if (!pools) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	rte_rwlock_write_lock(&mng->resize_rwl);
+	/* ASO SQ/QP was already initialized in the startup. */
+	if (old_pools) {
+		/* Realloc could be an alternative choice. */
+		rte_memcpy(pools, old_pools,
+			   mng->n * sizeof(struct mlx5_aso_ct_pool *));
+		mlx5_free(old_pools);
+	}
+	mng->n = resize;
+	mng->pools = pools;
+	rte_rwlock_write_unlock(&mng->resize_rwl);
+	return 0;
+}
+
+/*
+ * Create and initialize a new ASO CT pool.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] ct_free
+ *   Where to put the pointer of a new CT action.
+ *
+ * @return
+ *   The CT actions pool pointer and @p ct_free is set on success,
+ *   NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_aso_ct_pool *
+flow_dv_ct_pool_create(struct rte_eth_dev *dev,
+		       struct mlx5_aso_ct_action **ct_free)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_pool *pool = NULL;
+	struct mlx5_devx_obj *obj = NULL;
+	uint32_t i;
+	uint32_t log_obj_size = rte_log2_u32(MLX5_ASO_CT_ACTIONS_PER_POOL);
+
+	obj = mlx5_devx_cmd_create_conn_track_offload_obj(priv->sh->ctx,
+						priv->sh->pdn, log_obj_size);
+	if (!obj) {
+		rte_errno = ENODATA;
+		DRV_LOG(ERR, "Failed to create conn_track_offload_obj using DevX.");
+		return NULL;
+	}
+	pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool), 0, SOCKET_ID_ANY);
+	if (!pool) {
+		rte_errno = ENOMEM;
+		claim_zero(mlx5_devx_cmd_destroy(obj));
+		return NULL;
+	}
+	pool->devx_obj = obj;
+	pool->index = mng->next;
+	/* Resize pools array if there is no room for the new pool in it. */
+	if (pool->index == mng->n && flow_dv_aso_ct_pools_resize(dev)) {
+		claim_zero(mlx5_devx_cmd_destroy(obj));
+		mlx5_free(pool);
+		return NULL;
+	}
+	mng->pools[pool->index] = pool;
+	mng->next++;
+	/* Assign the first action in the new pool, the rest go to free list. */
+	*ct_free = &pool->actions[0];
+	/* Lock outside, the list operation is safe here. */
+	for (i = 1; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
+		/* refcnt is 0 when allocating the memory. */
+		pool->actions[i].offset = i;
+		LIST_INSERT_HEAD(&mng->free_cts, &pool->actions[i], next);
+	}
+	return pool;
+}
+
+/*
+ * Allocate a ASO CT action from free list.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Index to ASO CT action on success, 0 otherwise and rte_errno is set.
+ */
+static uint32_t
+flow_dv_aso_ct_alloc(struct rte_eth_dev *dev, struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pools_mng *mng = priv->sh->ct_mng;
+	struct mlx5_aso_ct_action *ct = NULL;
+	struct mlx5_aso_ct_pool *pool;
+	uint8_t reg_c;
+	uint32_t ct_idx;
+
+	MLX5_ASSERT(mng);
+	if (!priv->config.devx) {
+		rte_errno = ENOTSUP;
+		return 0;
+	}
+	/* Get a free CT action, if no, a new pool will be created. */
+	rte_spinlock_lock(&mng->ct_sl);
+	ct = LIST_FIRST(&mng->free_cts);
+	if (ct) {
+		LIST_REMOVE(ct, next);
+	} else if (!flow_dv_ct_pool_create(dev, &ct)) {
+		rte_spinlock_unlock(&mng->ct_sl);
+		rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_ACTION,
+				   NULL, "failed to create ASO CT pool");
+		return 0;
+	}
+	rte_spinlock_unlock(&mng->ct_sl);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	ct_idx = MLX5_MAKE_CT_IDX(pool->index, ct->offset);
+	/* 0: inactive, 1: created, 2+: used by flows. */
+	__atomic_store_n(&ct->refcnt, 1, __ATOMIC_RELAXED);
+	reg_c = mlx5_flow_get_reg_id(dev, MLX5_ASO_CONNTRACK, 0, error);
+	if (!ct->dr_action_orig) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+		ct->dr_action_orig = mlx5_glue->dv_create_flow_action_aso
+			(priv->sh->rx_domain, pool->devx_obj->obj,
+			 ct->offset,
+			 MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_INITIATOR,
+			 reg_c - REG_C_0);
+#else
+		RTE_SET_USED(reg_c);
+#endif
+		if (!ct->dr_action_orig) {
+			flow_dv_aso_ct_release(dev, ct_idx);
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					   "failed to create ASO CT action");
+			return 0;
+		}
+	}
+	if (!ct->dr_action_rply) {
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+		ct->dr_action_rply = mlx5_glue->dv_create_flow_action_aso
+			(priv->sh->rx_domain, pool->devx_obj->obj,
+			 ct->offset,
+			 MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_RESPONDER,
+			 reg_c - REG_C_0);
+#endif
+		if (!ct->dr_action_rply) {
+			flow_dv_aso_ct_release(dev, ct_idx);
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					   "failed to create ASO CT action");
+			return 0;
+		}
+	}
+	return ct_idx;
+}
+
+/*
+ * Create a conntrack object with context and actions by using ASO mechanism.
+ *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
+ * @param[in] pro
+ *   Pointer to conntrack information profile.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Index to conntrack object on success, 0 otherwise.
+ */
+static uint32_t
+flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
+				   const struct rte_flow_action_conntrack *pro,
+				   struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
+	struct mlx5_aso_ct_action *ct;
+	uint32_t idx;
+
+	if (!sh->ct_aso_en)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Connection is not supported");
+	idx = flow_dv_aso_ct_alloc(dev, error);
+	if (!idx)
+		return rte_flow_error_set(error, rte_errno,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Failed to allocate CT object");
+	ct = flow_aso_ct_get_by_idx(dev, idx);
+	if (mlx5_aso_ct_update_by_wqe(sh, ct, pro))
+		return rte_flow_error_set(error, EBUSY,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Failed to update CT");
+	return idx;
+}
+
 /**
  * Fill the flow with DV spec, lock free
  * (mutex should be acquired by caller).
@@ -13729,6 +13985,7 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 {
 	uint32_t idx = 0;
 	uint32_t ret = 0;
+	struct mlx5_priv *priv = dev->data->dev_private;
 
 	switch (action->type) {
 	case RTE_FLOW_ACTION_TYPE_RSS:
@@ -13754,6 +14011,12 @@ flow_dv_action_create(struct rte_eth_dev *dev,
 		idx = (MLX5_INDIRECT_ACTION_TYPE_COUNT <<
 		       MLX5_INDIRECT_ACTION_TYPE_OFFSET) | ret;
 		break;
+	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+		ret = flow_dv_translate_create_conntrack(dev, action->conf,
+							 err);
+		idx = (MLX5_INDIRECT_ACTION_TYPE_CT <<
+		       MLX5_INDIRECT_ACTION_TYPE_OFFSET) | ret;
+		break;
 	default:
 		rte_flow_error_set(err, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
 				   NULL, "action type not supported");
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v6 08/17] net/mlx5: close CT management structure
  2021-05-05  9:49 ` [dpdk-dev] [PATCH v6 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (6 preceding siblings ...)
  2021-05-05  9:49   ` [dpdk-dev] [PATCH v6 07/17] net/mlx5: add actions creating " Bing Zhao
@ 2021-05-05  9:50   ` Bing Zhao
  2021-05-05  9:50   ` [dpdk-dev] [PATCH v6 09/17] net/mlx5: add ASO CT query implementation Bing Zhao
                     ` (8 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  9:50 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When freeing the IB shared context during stopping a device, the
ASO connection tracking management structure should also be cleaned
up.

All the DR actions created should be destroyed. The structures need
to be freed and ASO CT QP should be released. In the meanwhile, the
allocated and registered memory region for query should also be
deregistered and then freed.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.c          | 56 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_aso.c |  4 +++
 2 files changed, 60 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 7e83d09fec..b610f29a66 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -708,6 +708,60 @@ mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
 	return 0;
 }
 
+/*
+ * Close and release all the resources of the
+ * ASO connection tracking management structure.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object to free.
+ */
+static void
+mlx5_flow_aso_ct_mng_close(struct mlx5_dev_ctx_shared *sh)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	struct mlx5_aso_ct_pool *ct_pool;
+	struct mlx5_aso_ct_action *ct;
+	uint32_t idx;
+	uint32_t val;
+	uint32_t cnt;
+	int i;
+
+	mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
+	idx = mng->next;
+	while (idx--) {
+		cnt = 0;
+		ct_pool = mng->pools[idx];
+		for (i = 0; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
+			ct = &ct_pool->actions[i];
+			val = __atomic_fetch_sub(&ct->refcnt, 1,
+						 __ATOMIC_RELAXED);
+			MLX5_ASSERT(val == 1);
+			if (val > 1)
+				cnt++;
+#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
+			if (ct->dr_action_orig)
+				claim_zero(mlx5_glue->destroy_flow_action
+							(ct->dr_action_orig));
+			if (ct->dr_action_rply)
+				claim_zero(mlx5_glue->destroy_flow_action
+							(ct->dr_action_rply));
+#endif
+		}
+		claim_zero(mlx5_devx_cmd_destroy(ct_pool->devx_obj));
+		if (cnt) {
+			DRV_LOG(DEBUG, "%u ASO CT objects are being used in the pool %u",
+				cnt, i);
+		}
+		mlx5_free(ct_pool);
+		/* in case of failure. */
+		mng->next--;
+	}
+	mlx5_free(mng->pools);
+	mlx5_free(mng);
+	/* Management structure must be cleared to 0s during allocation. */
+	sh->ct_mng = NULL;
+}
+
 /**
  * Initialize the flow resources' indexed mempool.
  *
@@ -1510,6 +1564,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->mreg_cp_tbl)
 		mlx5_hlist_destroy(priv->mreg_cp_tbl);
 	mlx5_mprq_free_mp(dev);
+	if (priv->sh->ct_mng)
+		mlx5_flow_aso_ct_mng_close(priv->sh);
 	mlx5_os_free_shared_dr(priv);
 	if (priv->rss_conf.rss_key != NULL)
 		mlx5_free(priv->rss_conf.rss_key);
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index fbf6e5ef38..37cb43147a 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -372,6 +372,10 @@ mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
 	case ASO_OPC_MOD_POLICER:
 		sq = &sh->mtrmng->pools_mng.sq;
 		break;
+	case ASO_OPC_MOD_CONNECTION_TRACKING:
+		mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
+		sq = &sh->ct_mng->aso_sq;
+		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
 		return;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v6 09/17] net/mlx5: add ASO CT query implementation
  2021-05-05  9:49 ` [dpdk-dev] [PATCH v6 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (7 preceding siblings ...)
  2021-05-05  9:50   ` [dpdk-dev] [PATCH v6 08/17] net/mlx5: close CT management structure Bing Zhao
@ 2021-05-05  9:50   ` Bing Zhao
  2021-05-05  9:50   ` [dpdk-dev] [PATCH v6 10/17] net/mlx5: add ASO CT destroy handling Bing Zhao
                     ` (7 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  9:50 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

After the connection tracking context is created and being used by
the flows, the context will be updated by the HW automatically after
a packet passed the CT validation. E.g., the ACK, SEQ, window and
state of CT can be updated with both direction traffic.

In order to query the updated contents of this context, a WQE should
be posted to the SQ with a return buffer. The data will be filled
into the buffer. And the profile will be filled with specific value.

During the execution of query command, the context may be updated.
The result of the query command may not be the latest one.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |  10 +-
 drivers/net/mlx5/mlx5_flow_aso.c | 245 +++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c  |  19 +++
 3 files changed, 273 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 0f2a26efc0..6d3f89519d 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -493,7 +493,10 @@ struct mlx5_aso_sq_elem {
 			uint16_t burst_size;
 		};
 		struct mlx5_aso_mtr *mtr;
-		struct mlx5_aso_ct_action *ct;
+		struct {
+			struct mlx5_aso_ct_action *ct;
+			char *query_data;
+		};
 	};
 };
 
@@ -1707,5 +1710,10 @@ int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			      struct mlx5_aso_ct_action *ct,
 			      const struct rte_flow_action_conntrack *profile);
+int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+			   struct mlx5_aso_ct_action *ct);
+int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			     struct mlx5_aso_ct_action *ct,
+			     struct rte_flow_action_conntrack *profile);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 37cb43147a..92fa9ede60 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -933,6 +933,7 @@ mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
 	/* Fill next WQE. */
 	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
 	sq->elts[sq->head & mask].ct = ct;
+	sq->elts[sq->head & mask].query_data = NULL;
 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
 	/* Each WQE will have a single CT object. */
 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
@@ -1048,9 +1049,95 @@ mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
 		ct = sq->elts[idx].ct;
 		MLX5_ASSERT(ct);
 		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
+		if (sq->elts[idx].query_data)
+			rte_memcpy(sq->elts[idx].query_data,
+				   (char *)((uintptr_t)sq->mr.addr + idx * 64),
+				   64);
 	}
 }
 
+/*
+ * Post a WQE to the ASO CT SQ to query the current context.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ * @param[in] ct
+ *   Pointer to the generic CT structure related to the context.
+ * @param[in] data
+ *   Pointer to data area to be filled.
+ *
+ * @return
+ *   1 on success (WQE number), 0 on failure.
+ */
+static int
+mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
+			    struct mlx5_aso_ct_action *ct, char *data)
+{
+	volatile struct mlx5_aso_wqe *wqe = NULL;
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t res;
+	uint16_t wqe_idx;
+	struct mlx5_aso_ct_pool *pool;
+	enum mlx5_aso_ct_state state =
+				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+
+	if (state == ASO_CONNTRACK_FREE) {
+		DRV_LOG(ERR, "Fail: No context to query");
+		return -1;
+	} else if (state == ASO_CONNTRACK_WAIT) {
+		return 0;
+	}
+	rte_spinlock_lock(&sq->sqsl);
+	res = size - (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!res)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
+		return 0;
+	}
+	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY);
+	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
+	/* Confirm the location and address of the prefetch instruction. */
+	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
+	/* Fill next WQE. */
+	wqe_idx = sq->head & mask;
+	sq->elts[wqe_idx].ct = ct;
+	sq->elts[wqe_idx].query_data = data;
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	/* Each WQE will have a single CT object. */
+	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
+						  ct->offset);
+	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
+			(ASO_OPC_MOD_CONNECTION_TRACKING <<
+			 WQE_CSEG_OPC_MOD_OFFSET) |
+			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
+	/*
+	 * There is no write request is required.
+	 * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
+	 * "BYTEWISE_64BYTE" is needed for a whole context.
+	 * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
+	 * "data_mask" is ignored.
+	 * Buffer address was already filled during initialization.
+	 */
+	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE <<
+					ASO_CSEG_DATA_MASK_MODE_OFFSET);
+	wqe->aso_cseg.data_mask = 0;
+	sq->head++;
+	/*
+	 * Each WQE contains 2 WQEBB's, even though
+	 * data segment is not used in this case.
+	 */
+	sq->pi += 2;
+	rte_io_wmb();
+	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
+	rte_wmb();
+	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
+	rte_wmb();
+	rte_spinlock_unlock(&sq->sqsl);
+	return 1;
+}
+
 /*
  * Handle completions from WQEs sent to ASO CT.
  *
@@ -1143,3 +1230,161 @@ mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		ct->offset, pool->index);
 	return -1;
 }
+
+/*
+ * The routine is used to wait for WQE completion to continue with queried data.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+		       struct mlx5_aso_ct_action *ct)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+
+	if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+	    ASO_CONNTRACK_READY)
+		return 0;
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+		    ASO_CONNTRACK_READY)
+			return 0;
+		/* Waiting for CQE ready, consider should block or sleep. */
+		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
+	} while (--poll_cqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+}
+
+/*
+ * Convert the hardware conntrack data format into the profile.
+ *
+ * @param[in] profile
+ *   Pointer to conntrack profile to be filled after query.
+ * @param[in] wdata
+ *   Pointer to data fetched from hardware.
+ */
+static inline void
+mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
+			char *wdata)
+{
+	void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
+	void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
+
+	/* MLX5_GET16 should be taken into consideration. */
+	profile->state = (enum rte_flow_conntrack_state)
+			 MLX5_GET(conn_track_aso, wdata, state);
+	profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
+	profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
+					  sack_permitted);
+	profile->live_connection = MLX5_GET(conn_track_aso, wdata,
+					    connection_assured);
+	profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
+						 challenged_acked);
+	profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
+					   max_ack_window);
+	profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
+						 retranmission_limit);
+	profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
+	profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
+	profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
+			      MLX5_GET(conn_track_aso, wdata, last_index);
+	profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
+	profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
+	profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
+	profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
+				reply_direction_tcp_liberal_enabled) |
+				MLX5_GET(conn_track_aso, wdata,
+				original_direction_tcp_liberal_enabled);
+	/* No liberal in the RTE structure profile. */
+	profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					    reply_direction_tcp_scale);
+	profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_close_initiated);
+	profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_data_unacked);
+	profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_max_ack);
+	profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
+					       r_dir, sent_end);
+	profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
+						r_dir, reply_end);
+	profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
+					      r_dir, max_win);
+	profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
+					      r_dir, max_ack);
+	profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					       original_direction_tcp_scale);
+	profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_close_initiated);
+	profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_data_unacked);
+	profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_max_ack);
+	profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
+						  o_dir, sent_end);
+	profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
+						   o_dir, reply_end);
+	profile->original_dir.max_win = MLX5_GET(tcp_window_params,
+						 o_dir, max_win);
+	profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
+						 o_dir, max_ack);
+}
+
+/*
+ * Query connection tracking information parameter by send WQE.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ * @param[out] profile
+ *   Pointer to connection tracking TCP information.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			 struct mlx5_aso_ct_action *ct,
+			 struct rte_flow_action_conntrack *profile)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+	char out_data[64 * 2];
+	int ret;
+
+	MLX5_ASSERT(ct);
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		ret = mlx5_aso_ct_sq_query_single(mng, ct, out_data);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			goto data_handle;
+		/* Waiting for wqe resource or state. */
+		else
+			rte_delay_us_sleep(10u);
+	} while (--poll_wqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+data_handle:
+	ret = mlx5_aso_ct_wait_ready(sh, ct);
+	if (!ret)
+		mlx5_aso_ct_obj_analyze(profile, out_data);
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index b3606e895c..1ef98da025 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -14809,6 +14809,8 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 	uint32_t act_idx = (uint32_t)(uintptr_t)handle;
 	uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
 
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
@@ -14824,6 +14826,23 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 		return 0;
 	case MLX5_INDIRECT_ACTION_TYPE_COUNT:
 		return flow_dv_query_count(dev, idx, data, error);
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		ct = flow_aso_ct_get_by_idx(dev, idx);
+		if (!ct->refcnt)
+			return rte_flow_error_set(error, EFAULT,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"CT object is inactive");
+		((struct rte_flow_action_conntrack *)data)->peer_port =
+							ct->peer;
+		((struct rte_flow_action_conntrack *)data)->is_original_dir =
+							ct->is_original;
+		if (mlx5_aso_ct_query_by_wqe(priv->sh, ct, data))
+			return rte_flow_error_set(error, EIO,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"Failed to query CT context");
+		return 0;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 147+ messages in thread

* [dpdk-dev] [PATCH v6 10/17] net/mlx5: add ASO CT destroy handling
  2021-05-05  9:49 ` [dpdk-dev] [PATCH v6 00/17] conntrack support in mlx5 PMD Bing Zhao
                     ` (8 preceding siblings ...)
  2021-05-05  9:50   ` [dpdk-dev] [PATCH v6 09/17] net/mlx5: add ASO CT query implementation Bing Zhao
@ 2021-05-05  9:50   ` Bing Zhao
  2021-05-05  9:50   ` [dpdk-dev] [PATCH v6 11/17] net/mlx5: add translation of CT action Bing Zhao
                     ` (6 subsequent siblings)
  16 siblings, 0 replies; 147+ messages in thread
From: Bing Zhao @ 2021-05-05  9:50 UTC (permalink / raw)
  To: viacheslavo, matan, thomas; +Cc: dev, orika, rasland

When trying to destroy an ASO connection tracking context, the DR
action created on this context should also be destroyed. Before
inserting the related software object into the management free list,
the reference count should be checked.

Right now, the context object will not be freed to the system and
will be reused directly from the free list.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_dv.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 1ef98da025..14670d712b 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -11531,9 +11531,