DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH 0/4] net/mlx5: support send scheduling for ConnextX-7
@ 2022-02-14  8:56 Viacheslav Ovsiienko
  2022-02-14  8:56 ` [PATCH 1/4] common/mlx5: add send on time capability check Viacheslav Ovsiienko
                   ` (3 more replies)
  0 siblings, 4 replies; 18+ messages in thread
From: Viacheslav Ovsiienko @ 2022-02-14  8:56 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland

Since the ConnectX-6DX the send scheduling capability is provided.
An application can register the dynamic field and dynamic flags
in mbuf for timestamp and specify the desired moment of time
the packet should sent.

The send scheduling feature over ConnectX-6DX uses the complicated
infrastructure with reference Clock Queue and inter-queue synchronizing
operations. Since ConnectX-7 the new wait descriptor format is
introduced where the timestamp can be promoted to hardware directly.
The patchset adds support for this new hardware option in PMD.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

Viacheslav Ovsiienko (4):
  common/mlx5: add send on time capability check
  net/mlx5: configure Tx queue with send on time offload
  net/mlx5: add wait on time support in Tx datapath
  doc: update send scheduling mlx5 feature description

 doc/guides/nics/mlx5.rst               |  5 ++
 doc/guides/rel_notes/release_22_03.rst |  6 +++
 drivers/common/mlx5/mlx5_devx_cmds.c   |  1 +
 drivers/common/mlx5/mlx5_devx_cmds.h   |  1 +
 drivers/common/mlx5/mlx5_prm.h         | 27 +++++++++-
 drivers/net/mlx5/linux/mlx5_verbs.c    |  4 ++
 drivers/net/mlx5/mlx5.h                |  3 ++
 drivers/net/mlx5/mlx5_devx.c           |  2 +
 drivers/net/mlx5/mlx5_tx.h             | 75 +++++++++++++++++++++++---
 drivers/net/mlx5/mlx5_txq.c            | 16 +++++-
 10 files changed, 129 insertions(+), 11 deletions(-)

-- 
2.18.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/4] common/mlx5: add send on time capability check
  2022-02-14  8:56 [PATCH 0/4] net/mlx5: support send scheduling for ConnextX-7 Viacheslav Ovsiienko
@ 2022-02-14  8:56 ` Viacheslav Ovsiienko
  2022-02-22 10:26   ` [PATCH v2 0/4] net/mlx5: support send scheduling for ConnectX-7 Viacheslav Ovsiienko
  2022-02-24 10:54   ` [PATCH v3 0/3] " Viacheslav Ovsiienko
  2022-02-14  8:56 ` [PATCH 2/4] net/mlx5: configure Tx queue with send on time offload Viacheslav Ovsiienko
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 18+ messages in thread
From: Viacheslav Ovsiienko @ 2022-02-14  8:56 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland

The patch provides check for send scheduling on time hardware capability.
With this capability enabled hardware is able to handle Wait WQEs
with directly specified timestamp values. No Clock Queue is needed
anymore to handle send scheduling.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c |  1 +
 drivers/common/mlx5/mlx5_devx_cmds.h |  1 +
 drivers/common/mlx5/mlx5_prm.h       | 27 ++++++++++++++++++++++++++-
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 2e807a0829..fb55ef96ea 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -962,6 +962,7 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
 		MLX5_GET(cmd_hca_cap, hcattr, umr_indirect_mkey_disabled);
 	attr->umr_modify_entity_size_disabled =
 		MLX5_GET(cmd_hca_cap, hcattr, umr_modify_entity_size_disabled);
+	attr->wait_on_time = MLX5_GET(cmd_hca_cap, hcattr, wait_on_time);
 	attr->crypto = MLX5_GET(cmd_hca_cap, hcattr, crypto);
 	if (attr->crypto)
 		attr->aes_xts = MLX5_GET(cmd_hca_cap, hcattr, aes_xts);
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 37821b493e..909d91adae 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -201,6 +201,7 @@ struct mlx5_hca_attr {
 	uint32_t scatter_fcs_w_decap_disable:1;
 	uint32_t flow_hit_aso:1; /* General obj type FLOW_HIT_ASO supported. */
 	uint32_t roce:1;
+	uint32_t wait_on_time:1;
 	uint32_t rq_ts_format:2;
 	uint32_t sq_ts_format:2;
 	uint32_t steering_format_version:4;
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 495b63191a..4ce302b478 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -133,6 +133,19 @@
 #define MLX5_OPCODE_WAIT 0x0fu
 #endif
 
+#define MLX5_OPC_MOD_WAIT_CQ_PI 0u
+#define MLX5_OPC_MOD_WAIT_DATA 1u
+#define MLX5_OPC_MOD_WAIT_TIME 2u
+
+
+#define MLX5_WAIT_COND_INVERT 0x10u
+#define MLX5_WAIT_COND_ALWAYS_TRUE 0u
+#define MLX5_WAIT_COND_EQUAL 1u
+#define MLX5_WAIT_COND_BIGGER 2u
+#define MLX5_WAIT_COND_SMALLER 3u
+#define MLX5_WAIT_COND_CYCLIC_BIGGER 4u
+#define MLX5_WAIT_COND_CYCLIC_SMALLER 5u
+
 #ifndef HAVE_MLX5_OPCODE_ACCESS_ASO
 #define MLX5_OPCODE_ACCESS_ASO 0x2du
 #endif
@@ -348,6 +361,15 @@ struct mlx5_wqe_qseg {
 	uint32_t qpn_cqn;
 } __rte_packed;
 
+struct mlx5_wqe_wseg {
+	uint32_t operation;
+	uint32_t lkey;
+	uint32_t va_high;
+	uint32_t va_low;
+	uint64_t value;
+	uint64_t mask;
+} __rte_packed;
+
 /* The title WQEBB, header of WQE. */
 struct mlx5_wqe {
 	union {
@@ -1659,7 +1681,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8 num_vhca_ports[0x8];
 	u8 reserved_at_618[0x6];
 	u8 sw_owner_id[0x1];
-	u8 reserved_at_61f[0x129];
+	u8 reserved_at_61f[0x6C];
+	u8 wait_on_data[0x1];
+	u8 wait_on_time[0x1];
+	u8 reserved_at_68d[0xBB];
 	u8 dma_mmo_qp[0x1];
 	u8 regexp_mmo_qp[0x1];
 	u8 compress_mmo_qp[0x1];
-- 
2.18.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 2/4] net/mlx5: configure Tx queue with send on time offload
  2022-02-14  8:56 [PATCH 0/4] net/mlx5: support send scheduling for ConnextX-7 Viacheslav Ovsiienko
  2022-02-14  8:56 ` [PATCH 1/4] common/mlx5: add send on time capability check Viacheslav Ovsiienko
@ 2022-02-14  8:56 ` Viacheslav Ovsiienko
  2022-02-14  8:56 ` [PATCH 3/4] net/mlx5: add wait on time support in Tx datapath Viacheslav Ovsiienko
  2022-02-14  8:56 ` [PATCH 4/4] doc: update send scheduling mlx5 feature description Viacheslav Ovsiienko
  3 siblings, 0 replies; 18+ messages in thread
From: Viacheslav Ovsiienko @ 2022-02-14  8:56 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland

The wait on time configuration flag is copied to the Tx queue
structure due to performance considerations. Timestamp
mask is preparted and stored in queue structure as well.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_verbs.c |  2 ++
 drivers/net/mlx5/mlx5.h             |  3 +++
 drivers/net/mlx5/mlx5_devx.c        |  2 ++
 drivers/net/mlx5/mlx5_tx.h          |  3 +++
 drivers/net/mlx5/mlx5_txq.c         | 11 ++++++++++-
 5 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/linux/mlx5_verbs.c b/drivers/net/mlx5/linux/mlx5_verbs.c
index 2b6eef44a7..80dd0bb6c1 100644
--- a/drivers/net/mlx5/linux/mlx5_verbs.c
+++ b/drivers/net/mlx5/linux/mlx5_verbs.c
@@ -1036,6 +1036,8 @@ mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->wqe_pi = 0;
 	txq_data->wqe_comp = 0;
 	txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV;
+	txq_data->wait_on_time = !!(!priv->config.tx_pp &&
+				     priv->config.hca_attr.wait_on_time);
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	/*
 	 * If using DevX need to query and store TIS transport domain value.
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 737ad6895c..3983d3aa50 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -341,6 +341,9 @@ struct mlx5_lb_ctx {
 #define MLX5_CNT_ARRAY_IDX(pool, cnt) \
 	((int)(((uint8_t *)(cnt) - (uint8_t *)((pool) + 1)) / \
 	MLX5_CNT_LEN(pool)))
+#define MLX5_TS_MASK_SECS 8ull
+/* timestamp wrapping in seconds, must be  power of 2. */
+
 /*
  * The pool index and offset of counter in the pool array makes up the
  * counter index. In case the counter is from pool 0 and offset 0, it
diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index 91243f684f..c6994e4a75 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -1327,6 +1327,8 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->qp_num_8s = txq_obj->sq_obj.sq->id << 8;
 	txq_data->db_heu = sh->cdev->config.dbnc == MLX5_TXDB_HEURISTIC;
 	txq_data->db_nc = sh->tx_uar.dbnc;
+	txq_data->wait_on_time = !!(!priv->config.tx_pp &&
+				     priv->config.hca_attr.wait_on_time);
 	/* Change Send Queue state to Ready-to-Send. */
 	ret = mlx5_txq_devx_modify(txq_obj, MLX5_TXQ_MOD_RST2RDY, 0);
 	if (ret) {
diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index c4b8271f6f..b50deb8b67 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -138,6 +138,8 @@ struct mlx5_txq_data {
 	uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */
 	uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */
 	uint16_t db_heu:1; /* Doorbell heuristic write barrier. */
+	uint16_t rt_timestamp:1; /* Realtime timestamp format. */
+	uint16_t wait_on_time:1; /* WQE with timestamp is supported. */
 	uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */
 	uint16_t inlen_send; /* Ordinary send data inline size. */
 	uint16_t inlen_empw; /* eMPW max packet size to inline. */
@@ -157,6 +159,7 @@ struct mlx5_txq_data {
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	uint16_t port_id; /* Port ID of device. */
 	uint16_t idx; /* Queue index. */
+	uint64_t rt_timemask; /* Scheduling timestamp mask. */
 	uint64_t ts_mask; /* Timestamp flag dynamic mask. */
 	int32_t ts_offset; /* Timestamp field dynamic offset. */
 	struct mlx5_dev_ctx_shared *sh; /* Shared context. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 4e0bf7af9c..3585546628 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -108,7 +108,7 @@ mlx5_get_tx_port_offloads(struct rte_eth_dev *dev)
 			     RTE_ETH_TX_OFFLOAD_TCP_CKSUM);
 	if (config->tso)
 		offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
-	if (config->tx_pp)
+	if (config->tx_pp || config->hca_attr.wait_on_time)
 		offloads |= RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP;
 	if (config->swp) {
 		if (config->swp & MLX5_SW_PARSING_CSUM_CAP)
@@ -1290,7 +1290,14 @@ mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev)
 	int off, nbit;
 	unsigned int i;
 	uint64_t mask = 0;
+	uint64_t ts_mask;
 
+	if (priv->config.rt_timestamp || !priv->config.hca_attr.dev_freq_khz)
+		ts_mask = MLX5_TS_MASK_SECS << 32;
+	else
+		ts_mask = rte_align64pow2(MLX5_TS_MASK_SECS * 1000ull *
+					  priv->config.hca_attr.dev_freq_khz);
+	ts_mask = rte_cpu_to_be_64(ts_mask - 1ull);
 	nbit = rte_mbuf_dynflag_lookup
 				(RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL);
 	off = rte_mbuf_dynfield_lookup
@@ -1304,5 +1311,7 @@ mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev)
 		data->sh = sh;
 		data->ts_mask = mask;
 		data->ts_offset = off;
+		data->rt_timestamp = priv->config.rt_timestamp;
+		data->rt_timemask = ts_mask;
 	}
 }
-- 
2.18.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 3/4] net/mlx5: add wait on time support in Tx datapath
  2022-02-14  8:56 [PATCH 0/4] net/mlx5: support send scheduling for ConnextX-7 Viacheslav Ovsiienko
  2022-02-14  8:56 ` [PATCH 1/4] common/mlx5: add send on time capability check Viacheslav Ovsiienko
  2022-02-14  8:56 ` [PATCH 2/4] net/mlx5: configure Tx queue with send on time offload Viacheslav Ovsiienko
@ 2022-02-14  8:56 ` Viacheslav Ovsiienko
  2022-02-14  8:56 ` [PATCH 4/4] doc: update send scheduling mlx5 feature description Viacheslav Ovsiienko
  3 siblings, 0 replies; 18+ messages in thread
From: Viacheslav Ovsiienko @ 2022-02-14  8:56 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland

The hardware since ConnectX-7 supports waiting on
specified moment of time with new introduced wait
descriptor. A timestamp can be directrly placed
into descriptor and pushed to sending queue.
Once hardware encounter the wait descriptor the
queue operation is suspended till specified moment
of time. This patch update the Tx datapath to handle
this new hardware wait capability.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_verbs.c |  4 +-
 drivers/net/mlx5/mlx5_tx.h          | 72 +++++++++++++++++++++++++----
 drivers/net/mlx5/mlx5_txq.c         |  7 ++-
 3 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_verbs.c b/drivers/net/mlx5/linux/mlx5_verbs.c
index 80dd0bb6c1..5f821c4645 100644
--- a/drivers/net/mlx5/linux/mlx5_verbs.c
+++ b/drivers/net/mlx5/linux/mlx5_verbs.c
@@ -1037,7 +1037,9 @@ mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->wqe_comp = 0;
 	txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV;
 	txq_data->wait_on_time = !!(!priv->config.tx_pp &&
-				     priv->config.hca_attr.wait_on_time);
+				     priv->config.hca_attr.wait_on_time &&
+				     txq_data->offloads &
+				     DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP);
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	/*
 	 * If using DevX need to query and store TIS transport domain value.
diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index b50deb8b67..0adc3f4839 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -780,7 +780,7 @@ mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq,
  *   compile time and may be used for optimization.
  */
 static __rte_always_inline void
-mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq,
+mlx5_tx_qseg_init(struct mlx5_txq_data *restrict txq,
 		  struct mlx5_txq_local *restrict loc __rte_unused,
 		  struct mlx5_wqe *restrict wqe,
 		  unsigned int wci,
@@ -795,6 +795,43 @@ mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq,
 	qs->reserved1 = RTE_BE32(0);
 }
 
+/**
+ * Build the Wait on Time Segment with specified timestamp value.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param loc
+ *   Pointer to burst routine local context.
+ * @param wqe
+ *   Pointer to WQE to fill with built Control Segment.
+ * @param ts
+ *   Timesatmp value to wait.
+ * @param olx
+ *   Configured Tx offloads mask. It is fully defined at
+ *   compile time and may be used for optimization.
+ */
+static __rte_always_inline void
+mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq,
+		  struct mlx5_txq_local *restrict loc __rte_unused,
+		  struct mlx5_wqe *restrict wqe,
+		  uint64_t ts,
+		  unsigned int olx __rte_unused)
+{
+	struct mlx5_wqe_wseg *ws;
+
+	ws = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE);
+	ws->operation = rte_cpu_to_be_32(MLX5_WAIT_COND_CYCLIC_BIGGER);
+	ws->lkey = RTE_BE32(0);
+	ws->va_high = RTE_BE32(0);
+	ws->va_low = RTE_BE32(0);
+	if (txq->rt_timestamp) {
+		ts = ts % (uint64_t)NS_PER_S
+		   | (ts / (uint64_t)NS_PER_S) << 32;
+	}
+	ws->value = rte_cpu_to_be_64(ts);
+	ws->mask = txq->rt_timemask;
+}
+
 /**
  * Build the Ethernet Segment without inlined data.
  * Supports Software Parser, Checksums and VLAN insertion Tx offload features.
@@ -1626,9 +1663,9 @@ mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq,
 {
 	if (MLX5_TXOFF_CONFIG(TXPP) &&
 	    loc->mbuf->ol_flags & txq->ts_mask) {
+		struct mlx5_dev_ctx_shared *sh;
 		struct mlx5_wqe *wqe;
 		uint64_t ts;
-		int32_t wci;
 
 		/*
 		 * Estimate the required space quickly and roughly.
@@ -1640,13 +1677,32 @@ mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq,
 			return MLX5_TXCMP_CODE_EXIT;
 		/* Convert the timestamp into completion to wait. */
 		ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *);
-		wci = mlx5_txpp_convert_tx_ts(txq->sh, ts);
-		if (unlikely(wci < 0))
-			return MLX5_TXCMP_CODE_SINGLE;
-		/* Build the WAIT WQE with specified completion. */
 		wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m);
-		mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx);
-		mlx5_tx_wseg_init(txq, loc, wqe, wci, olx);
+		sh = txq->sh;
+		if (txq->wait_on_time) {
+			/* The wait on time capability should be used. */
+			ts -= sh->txpp.skew;
+			mlx5_tx_cseg_init(txq, loc, wqe,
+					  1 + sizeof(struct mlx5_wqe_wseg) /
+					      MLX5_WSEG_SIZE,
+					  MLX5_OPCODE_WAIT |
+					  MLX5_OPC_MOD_WAIT_TIME << 24, olx);
+			mlx5_tx_wseg_init(txq, loc, wqe, ts, olx);
+		} else {
+			/* Legacy cross-channel operation should be used. */
+			int32_t wci;
+
+			wci = mlx5_txpp_convert_tx_ts(sh, ts);
+			if (unlikely(wci < 0))
+				return MLX5_TXCMP_CODE_SINGLE;
+			/* Build the WAIT WQE with specified completion. */
+			mlx5_tx_cseg_init(txq, loc, wqe,
+					  1 + sizeof(struct mlx5_wqe_qseg) /
+					      MLX5_WSEG_SIZE,
+					  MLX5_OPCODE_WAIT |
+					  MLX5_OPC_MOD_WAIT_CQ_PI << 24, olx);
+			mlx5_tx_qseg_init(txq, loc, wqe, wci, olx);
+		}
 		++txq->wqe_ci;
 		--loc->wqe_free;
 		return MLX5_TXCMP_CODE_MULTI;
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 3585546628..90b71e380f 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -1302,7 +1302,8 @@ mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev)
 				(RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL);
 	off = rte_mbuf_dynfield_lookup
 				(RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL);
-	if (nbit >= 0 && off >= 0 && sh->txpp.refcnt)
+	if (nbit >= 0 && off >= 0 &&
+	    (sh->txpp.refcnt || priv->config.hca_attr.wait_on_time))
 		mask = 1ULL << nbit;
 	for (i = 0; i != priv->txqs_n; ++i) {
 		data = (*priv->txqs)[i];
@@ -1312,6 +1313,8 @@ mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev)
 		data->ts_mask = mask;
 		data->ts_offset = off;
 		data->rt_timestamp = priv->config.rt_timestamp;
-		data->rt_timemask = ts_mask;
+		data->rt_timemask = (data->offloads &
+				     RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP) ?
+				     ts_mask : 0;
 	}
 }
-- 
2.18.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 4/4] doc: update send scheduling mlx5 feature description
  2022-02-14  8:56 [PATCH 0/4] net/mlx5: support send scheduling for ConnextX-7 Viacheslav Ovsiienko
                   ` (2 preceding siblings ...)
  2022-02-14  8:56 ` [PATCH 3/4] net/mlx5: add wait on time support in Tx datapath Viacheslav Ovsiienko
@ 2022-02-14  8:56 ` Viacheslav Ovsiienko
  3 siblings, 0 replies; 18+ messages in thread
From: Viacheslav Ovsiienko @ 2022-02-14  8:56 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland

Updated:
  - send scheduling feature description for mlx5
  - release notes

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 doc/guides/nics/mlx5.rst               | 5 +++++
 doc/guides/rel_notes/release_22_03.rst | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index c3cc0c0f41..6494f4ae39 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -934,6 +934,11 @@ Driver options
   By default (if the ``tx_pp`` is not specified) send scheduling on timestamps
   feature is disabled.
 
+  Starting since ConnectX-7 the capability to schedule traffic directly
+  on timestamp specified in descriptor is provided, no extra objects are
+  needed anymore and scheduling capability is advertised and handled
+  regardless tx_pp parameter presence.
+
 - ``tx_skew`` parameter [int]
 
   The parameter adjusts the send packet scheduling on timestamps and represents
diff --git a/doc/guides/rel_notes/release_22_03.rst b/doc/guides/rel_notes/release_22_03.rst
index ff3095d742..268f2827e2 100644
--- a/doc/guides/rel_notes/release_22_03.rst
+++ b/doc/guides/rel_notes/release_22_03.rst
@@ -109,6 +109,12 @@ New Features
   * Added rte_flow support for matching GENEVE packets.
   * Added rte_flow support for matching eCPRI packets.
 
+* **Updated Mellanox mlx5 driver.**
+
+  Updated the Mellanox mlx5 driver with new features and improvements, including:
+
+  * Support ConnectX-7 capability to schedule traffic sending on timestamp
+
 * **Updated Wangxun ngbe driver.**
 
   * Added support for devices of custom PHY interfaces.
-- 
2.18.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 0/4] net/mlx5: support send scheduling for ConnectX-7
  2022-02-14  8:56 ` [PATCH 1/4] common/mlx5: add send on time capability check Viacheslav Ovsiienko
@ 2022-02-22 10:26   ` Viacheslav Ovsiienko
  2022-02-22 10:26     ` [PATCH v2 1/4] common/mlx5: add send on time capability check Viacheslav Ovsiienko
                       ` (4 more replies)
  2022-02-24 10:54   ` [PATCH v3 0/3] " Viacheslav Ovsiienko
  1 sibling, 5 replies; 18+ messages in thread
From: Viacheslav Ovsiienko @ 2022-02-22 10:26 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland

Since the ConnectX-6DX the send scheduling capability is provided.
An application can register the dynamic field and dynamic flags
in mbuf for timestamp and specify the desired moment of time
the packet should sent.

The send scheduling feature over ConnectX-6DX uses the complicated
infrastructure with reference Clock Queue and inter-queue synchronizing
operations. Since ConnectX-7 the new wait descriptor format is
introduced where the timestamp can be promoted to hardware directly.
The patchset adds support for this new hardware option in PMD.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

v1: http://patches.dpdk.org/project/dpdk/cover/20220214085655.22648-1-viacheslavo@nvidia.com/
v2: - fix compilation issues
    - rebase

Viacheslav Ovsiienko (4):
  common/mlx5: add send on time capability check
  net/mlx5: configure Tx queue with send on time offload
  net/mlx5: add wait on time support in Tx datapath
  doc: update send scheduling mlx5 feature description

 doc/guides/nics/mlx5.rst               |  5 ++
 doc/guides/rel_notes/release_22_03.rst |  6 +++
 drivers/common/mlx5/mlx5_devx_cmds.c   |  1 +
 drivers/common/mlx5/mlx5_devx_cmds.h   |  1 +
 drivers/common/mlx5/mlx5_prm.h         | 27 +++++++++-
 drivers/net/mlx5/linux/mlx5_verbs.c    |  4 ++
 drivers/net/mlx5/mlx5.h                |  3 ++
 drivers/net/mlx5/mlx5_devx.c           |  2 +
 drivers/net/mlx5/mlx5_tx.h             | 75 +++++++++++++++++++++++---
 drivers/net/mlx5/mlx5_txq.c            | 18 ++++++-
 10 files changed, 131 insertions(+), 11 deletions(-)

-- 
2.18.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 1/4] common/mlx5: add send on time capability check
  2022-02-22 10:26   ` [PATCH v2 0/4] net/mlx5: support send scheduling for ConnectX-7 Viacheslav Ovsiienko
@ 2022-02-22 10:26     ` Viacheslav Ovsiienko
  2022-02-22 10:26     ` [PATCH v2 2/4] net/mlx5: configure Tx queue with send on time offload Viacheslav Ovsiienko
                       ` (3 subsequent siblings)
  4 siblings, 0 replies; 18+ messages in thread
From: Viacheslav Ovsiienko @ 2022-02-22 10:26 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland

The patch provides check for send scheduling on time hardware capability.
With this capability enabled hardware is able to handle Wait WQEs
with directly specified timestamp values. No Clock Queue is needed
anymore to handle send scheduling.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c |  1 +
 drivers/common/mlx5/mlx5_devx_cmds.h |  1 +
 drivers/common/mlx5/mlx5_prm.h       | 27 ++++++++++++++++++++++++++-
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 2e807a0829..fb55ef96ea 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -962,6 +962,7 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
 		MLX5_GET(cmd_hca_cap, hcattr, umr_indirect_mkey_disabled);
 	attr->umr_modify_entity_size_disabled =
 		MLX5_GET(cmd_hca_cap, hcattr, umr_modify_entity_size_disabled);
+	attr->wait_on_time = MLX5_GET(cmd_hca_cap, hcattr, wait_on_time);
 	attr->crypto = MLX5_GET(cmd_hca_cap, hcattr, crypto);
 	if (attr->crypto)
 		attr->aes_xts = MLX5_GET(cmd_hca_cap, hcattr, aes_xts);
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 37821b493e..909d91adae 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -201,6 +201,7 @@ struct mlx5_hca_attr {
 	uint32_t scatter_fcs_w_decap_disable:1;
 	uint32_t flow_hit_aso:1; /* General obj type FLOW_HIT_ASO supported. */
 	uint32_t roce:1;
+	uint32_t wait_on_time:1;
 	uint32_t rq_ts_format:2;
 	uint32_t sq_ts_format:2;
 	uint32_t steering_format_version:4;
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 495b63191a..4ce302b478 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -133,6 +133,19 @@
 #define MLX5_OPCODE_WAIT 0x0fu
 #endif
 
+#define MLX5_OPC_MOD_WAIT_CQ_PI 0u
+#define MLX5_OPC_MOD_WAIT_DATA 1u
+#define MLX5_OPC_MOD_WAIT_TIME 2u
+
+
+#define MLX5_WAIT_COND_INVERT 0x10u
+#define MLX5_WAIT_COND_ALWAYS_TRUE 0u
+#define MLX5_WAIT_COND_EQUAL 1u
+#define MLX5_WAIT_COND_BIGGER 2u
+#define MLX5_WAIT_COND_SMALLER 3u
+#define MLX5_WAIT_COND_CYCLIC_BIGGER 4u
+#define MLX5_WAIT_COND_CYCLIC_SMALLER 5u
+
 #ifndef HAVE_MLX5_OPCODE_ACCESS_ASO
 #define MLX5_OPCODE_ACCESS_ASO 0x2du
 #endif
@@ -348,6 +361,15 @@ struct mlx5_wqe_qseg {
 	uint32_t qpn_cqn;
 } __rte_packed;
 
+struct mlx5_wqe_wseg {
+	uint32_t operation;
+	uint32_t lkey;
+	uint32_t va_high;
+	uint32_t va_low;
+	uint64_t value;
+	uint64_t mask;
+} __rte_packed;
+
 /* The title WQEBB, header of WQE. */
 struct mlx5_wqe {
 	union {
@@ -1659,7 +1681,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8 num_vhca_ports[0x8];
 	u8 reserved_at_618[0x6];
 	u8 sw_owner_id[0x1];
-	u8 reserved_at_61f[0x129];
+	u8 reserved_at_61f[0x6C];
+	u8 wait_on_data[0x1];
+	u8 wait_on_time[0x1];
+	u8 reserved_at_68d[0xBB];
 	u8 dma_mmo_qp[0x1];
 	u8 regexp_mmo_qp[0x1];
 	u8 compress_mmo_qp[0x1];
-- 
2.18.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 2/4] net/mlx5: configure Tx queue with send on time offload
  2022-02-22 10:26   ` [PATCH v2 0/4] net/mlx5: support send scheduling for ConnectX-7 Viacheslav Ovsiienko
  2022-02-22 10:26     ` [PATCH v2 1/4] common/mlx5: add send on time capability check Viacheslav Ovsiienko
@ 2022-02-22 10:26     ` Viacheslav Ovsiienko
  2022-02-22 10:26     ` [PATCH v2 3/4] net/mlx5: add wait on time support in Tx datapath Viacheslav Ovsiienko
                       ` (2 subsequent siblings)
  4 siblings, 0 replies; 18+ messages in thread
From: Viacheslav Ovsiienko @ 2022-02-22 10:26 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland

The wait on time configuration flag is copied to the Tx queue
structure due to performance considerations. Timestamp
mask is preparted and stored in queue structure as well.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_verbs.c |  4 ++++
 drivers/net/mlx5/mlx5.h             |  3 +++
 drivers/net/mlx5/mlx5_devx.c        |  2 ++
 drivers/net/mlx5/mlx5_tx.h          |  3 +++
 drivers/net/mlx5/mlx5_txq.c         | 18 ++++++++++++++++--
 5 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_verbs.c b/drivers/net/mlx5/linux/mlx5_verbs.c
index 73c44138de..331c61d3c5 100644
--- a/drivers/net/mlx5/linux/mlx5_verbs.c
+++ b/drivers/net/mlx5/linux/mlx5_verbs.c
@@ -1035,6 +1035,10 @@ mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->wqe_pi = 0;
 	txq_data->wqe_comp = 0;
 	txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV;
+	txq_data->wait_on_time = !!(!priv->sh->config.tx_pp &&
+				 priv->sh->cdev->config.hca_attr.wait_on_time &&
+				 txq_data->offloads &
+				 RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP);
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	/*
 	 * If using DevX need to query and store TIS transport domain value.
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 35ea3fb47c..4db94bb6e2 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -337,6 +337,9 @@ struct mlx5_lb_ctx {
 #define MLX5_CNT_ARRAY_IDX(pool, cnt) \
 	((int)(((uint8_t *)(cnt) - (uint8_t *)((pool) + 1)) / \
 	MLX5_CNT_LEN(pool)))
+#define MLX5_TS_MASK_SECS 8ull
+/* timestamp wrapping in seconds, must be  power of 2. */
+
 /*
  * The pool index and offset of counter in the pool array makes up the
  * counter index. In case the counter is from pool 0 and offset 0, it
diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index e57787cfec..e178b799fa 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -1328,6 +1328,8 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->qp_num_8s = txq_obj->sq_obj.sq->id << 8;
 	txq_data->db_heu = sh->cdev->config.dbnc == MLX5_TXDB_HEURISTIC;
 	txq_data->db_nc = sh->tx_uar.dbnc;
+	txq_data->wait_on_time = !!(!sh->config.tx_pp &&
+				    sh->cdev->config.hca_attr.wait_on_time);
 	/* Change Send Queue state to Ready-to-Send. */
 	ret = mlx5_txq_devx_modify(txq_obj, MLX5_TXQ_MOD_RST2RDY, 0);
 	if (ret) {
diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index c4b8271f6f..b50deb8b67 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -138,6 +138,8 @@ struct mlx5_txq_data {
 	uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */
 	uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */
 	uint16_t db_heu:1; /* Doorbell heuristic write barrier. */
+	uint16_t rt_timestamp:1; /* Realtime timestamp format. */
+	uint16_t wait_on_time:1; /* WQE with timestamp is supported. */
 	uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */
 	uint16_t inlen_send; /* Ordinary send data inline size. */
 	uint16_t inlen_empw; /* eMPW max packet size to inline. */
@@ -157,6 +159,7 @@ struct mlx5_txq_data {
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	uint16_t port_id; /* Port ID of device. */
 	uint16_t idx; /* Queue index. */
+	uint64_t rt_timemask; /* Scheduling timestamp mask. */
 	uint64_t ts_mask; /* Timestamp flag dynamic mask. */
 	int32_t ts_offset; /* Timestamp field dynamic offset. */
 	struct mlx5_dev_ctx_shared *sh; /* Shared context. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index edbaa50692..f128c3d1a5 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -109,7 +109,8 @@ mlx5_get_tx_port_offloads(struct rte_eth_dev *dev)
 			     RTE_ETH_TX_OFFLOAD_TCP_CKSUM);
 	if (dev_cap->tso)
 		offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
-	if (priv->sh->config.tx_pp)
+	if (priv->sh->config.tx_pp ||
+	    priv->sh->cdev->config.hca_attr.wait_on_time)
 		offloads |= RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP;
 	if (dev_cap->swp) {
 		if (dev_cap->swp & MLX5_SW_PARSING_CSUM_CAP)
@@ -1288,12 +1289,21 @@ mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev)
 	int off, nbit;
 	unsigned int i;
 	uint64_t mask = 0;
+	uint64_t ts_mask;
 
+	if (sh->dev_cap.rt_timestamp ||
+	    !sh->cdev->config.hca_attr.dev_freq_khz)
+		ts_mask = MLX5_TS_MASK_SECS << 32;
+	else
+		ts_mask = rte_align64pow2(MLX5_TS_MASK_SECS * 1000ull *
+				sh->cdev->config.hca_attr.dev_freq_khz);
+	ts_mask = rte_cpu_to_be_64(ts_mask - 1ull);
 	nbit = rte_mbuf_dynflag_lookup
 				(RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL);
 	off = rte_mbuf_dynfield_lookup
 				(RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL);
-	if (nbit >= 0 && off >= 0 && sh->txpp.refcnt)
+	if (nbit >= 0 && off >= 0 &&
+	    (sh->txpp.refcnt || priv->sh->cdev->config.hca_attr.wait_on_time))
 		mask = 1ULL << nbit;
 	for (i = 0; i != priv->txqs_n; ++i) {
 		data = (*priv->txqs)[i];
@@ -1302,5 +1312,9 @@ mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev)
 		data->sh = sh;
 		data->ts_mask = mask;
 		data->ts_offset = off;
+		data->rt_timestamp = sh->dev_cap.rt_timestamp;
+		data->rt_timemask = (data->offloads &
+				     RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP) ?
+				     ts_mask : 0;
 	}
 }
-- 
2.18.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 3/4] net/mlx5: add wait on time support in Tx datapath
  2022-02-22 10:26   ` [PATCH v2 0/4] net/mlx5: support send scheduling for ConnectX-7 Viacheslav Ovsiienko
  2022-02-22 10:26     ` [PATCH v2 1/4] common/mlx5: add send on time capability check Viacheslav Ovsiienko
  2022-02-22 10:26     ` [PATCH v2 2/4] net/mlx5: configure Tx queue with send on time offload Viacheslav Ovsiienko
@ 2022-02-22 10:26     ` Viacheslav Ovsiienko
  2022-02-22 10:26     ` [PATCH v2 4/4] doc: update send scheduling mlx5 feature description Viacheslav Ovsiienko
  2022-02-23 13:13     ` [PATCH v2 0/4] net/mlx5: support send scheduling for ConnectX-7 Raslan Darawsheh
  4 siblings, 0 replies; 18+ messages in thread
From: Viacheslav Ovsiienko @ 2022-02-22 10:26 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland

The hardware since ConnectX-7 supports waiting on
specified moment of time with new introduced wait
descriptor. A timestamp can be directly placed
into descriptor and pushed to sending queue.
Once hardware encounter the wait descriptor the
queue operation is suspended till specified moment
of time. This patch update the Tx datapath to handle
this new hardware wait capability.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/net/mlx5/mlx5_tx.h | 72 +++++++++++++++++++++++++++++++++-----
 1 file changed, 64 insertions(+), 8 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index b50deb8b67..0adc3f4839 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -780,7 +780,7 @@ mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq,
  *   compile time and may be used for optimization.
  */
 static __rte_always_inline void
-mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq,
+mlx5_tx_qseg_init(struct mlx5_txq_data *restrict txq,
 		  struct mlx5_txq_local *restrict loc __rte_unused,
 		  struct mlx5_wqe *restrict wqe,
 		  unsigned int wci,
@@ -795,6 +795,43 @@ mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq,
 	qs->reserved1 = RTE_BE32(0);
 }
 
+/**
+ * Build the Wait on Time Segment with specified timestamp value.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param loc
+ *   Pointer to burst routine local context.
+ * @param wqe
+ *   Pointer to WQE to fill with built Control Segment.
+ * @param ts
+ *   Timesatmp value to wait.
+ * @param olx
+ *   Configured Tx offloads mask. It is fully defined at
+ *   compile time and may be used for optimization.
+ */
+static __rte_always_inline void
+mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq,
+		  struct mlx5_txq_local *restrict loc __rte_unused,
+		  struct mlx5_wqe *restrict wqe,
+		  uint64_t ts,
+		  unsigned int olx __rte_unused)
+{
+	struct mlx5_wqe_wseg *ws;
+
+	ws = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE);
+	ws->operation = rte_cpu_to_be_32(MLX5_WAIT_COND_CYCLIC_BIGGER);
+	ws->lkey = RTE_BE32(0);
+	ws->va_high = RTE_BE32(0);
+	ws->va_low = RTE_BE32(0);
+	if (txq->rt_timestamp) {
+		ts = ts % (uint64_t)NS_PER_S
+		   | (ts / (uint64_t)NS_PER_S) << 32;
+	}
+	ws->value = rte_cpu_to_be_64(ts);
+	ws->mask = txq->rt_timemask;
+}
+
 /**
  * Build the Ethernet Segment without inlined data.
  * Supports Software Parser, Checksums and VLAN insertion Tx offload features.
@@ -1626,9 +1663,9 @@ mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq,
 {
 	if (MLX5_TXOFF_CONFIG(TXPP) &&
 	    loc->mbuf->ol_flags & txq->ts_mask) {
+		struct mlx5_dev_ctx_shared *sh;
 		struct mlx5_wqe *wqe;
 		uint64_t ts;
-		int32_t wci;
 
 		/*
 		 * Estimate the required space quickly and roughly.
@@ -1640,13 +1677,32 @@ mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq,
 			return MLX5_TXCMP_CODE_EXIT;
 		/* Convert the timestamp into completion to wait. */
 		ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *);
-		wci = mlx5_txpp_convert_tx_ts(txq->sh, ts);
-		if (unlikely(wci < 0))
-			return MLX5_TXCMP_CODE_SINGLE;
-		/* Build the WAIT WQE with specified completion. */
 		wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m);
-		mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx);
-		mlx5_tx_wseg_init(txq, loc, wqe, wci, olx);
+		sh = txq->sh;
+		if (txq->wait_on_time) {
+			/* The wait on time capability should be used. */
+			ts -= sh->txpp.skew;
+			mlx5_tx_cseg_init(txq, loc, wqe,
+					  1 + sizeof(struct mlx5_wqe_wseg) /
+					      MLX5_WSEG_SIZE,
+					  MLX5_OPCODE_WAIT |
+					  MLX5_OPC_MOD_WAIT_TIME << 24, olx);
+			mlx5_tx_wseg_init(txq, loc, wqe, ts, olx);
+		} else {
+			/* Legacy cross-channel operation should be used. */
+			int32_t wci;
+
+			wci = mlx5_txpp_convert_tx_ts(sh, ts);
+			if (unlikely(wci < 0))
+				return MLX5_TXCMP_CODE_SINGLE;
+			/* Build the WAIT WQE with specified completion. */
+			mlx5_tx_cseg_init(txq, loc, wqe,
+					  1 + sizeof(struct mlx5_wqe_qseg) /
+					      MLX5_WSEG_SIZE,
+					  MLX5_OPCODE_WAIT |
+					  MLX5_OPC_MOD_WAIT_CQ_PI << 24, olx);
+			mlx5_tx_qseg_init(txq, loc, wqe, wci, olx);
+		}
 		++txq->wqe_ci;
 		--loc->wqe_free;
 		return MLX5_TXCMP_CODE_MULTI;
-- 
2.18.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 4/4] doc: update send scheduling mlx5 feature description
  2022-02-22 10:26   ` [PATCH v2 0/4] net/mlx5: support send scheduling for ConnectX-7 Viacheslav Ovsiienko
                       ` (2 preceding siblings ...)
  2022-02-22 10:26     ` [PATCH v2 3/4] net/mlx5: add wait on time support in Tx datapath Viacheslav Ovsiienko
@ 2022-02-22 10:26     ` Viacheslav Ovsiienko
  2022-02-23 18:56       ` Ferruh Yigit
  2022-02-23 13:13     ` [PATCH v2 0/4] net/mlx5: support send scheduling for ConnectX-7 Raslan Darawsheh
  4 siblings, 1 reply; 18+ messages in thread
From: Viacheslav Ovsiienko @ 2022-02-22 10:26 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland

Updated:
  - send scheduling feature description for mlx5
  - release notes

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 doc/guides/nics/mlx5.rst               | 5 +++++
 doc/guides/rel_notes/release_22_03.rst | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index c3cc0c0f41..6494f4ae39 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -934,6 +934,11 @@ Driver options
   By default (if the ``tx_pp`` is not specified) send scheduling on timestamps
   feature is disabled.
 
+  Starting since ConnectX-7 the capability to schedule traffic directly
+  on timestamp specified in descriptor is provided, no extra objects are
+  needed anymore and scheduling capability is advertised and handled
+  regardless tx_pp parameter presence.
+
 - ``tx_skew`` parameter [int]
 
   The parameter adjusts the send packet scheduling on timestamps and represents
diff --git a/doc/guides/rel_notes/release_22_03.rst b/doc/guides/rel_notes/release_22_03.rst
index 41923f50e6..12832ca101 100644
--- a/doc/guides/rel_notes/release_22_03.rst
+++ b/doc/guides/rel_notes/release_22_03.rst
@@ -118,6 +118,12 @@ New Features
   * Added PPPoL2TPv2oUDP FDIR distribute packets based on inner IP
     src/dst address and UDP/TCP src/dst port.
 
+* **Updated Mellanox mlx5 driver.**
+
+  Updated the Mellanox mlx5 driver with new features and improvements, including:
+
+  * Support ConnectX-7 capability to schedule traffic sending on timestamp
+
 * **Updated Wangxun ngbe driver.**
 
   * Added support for devices of custom PHY interfaces.
-- 
2.18.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH v2 0/4] net/mlx5: support send scheduling for ConnectX-7
  2022-02-22 10:26   ` [PATCH v2 0/4] net/mlx5: support send scheduling for ConnectX-7 Viacheslav Ovsiienko
                       ` (3 preceding siblings ...)
  2022-02-22 10:26     ` [PATCH v2 4/4] doc: update send scheduling mlx5 feature description Viacheslav Ovsiienko
@ 2022-02-23 13:13     ` Raslan Darawsheh
  4 siblings, 0 replies; 18+ messages in thread
From: Raslan Darawsheh @ 2022-02-23 13:13 UTC (permalink / raw)
  To: Slava Ovsiienko, dev; +Cc: Matan Azrad

Hi,

> -----Original Message-----
> From: Slava Ovsiienko <viacheslavo@nvidia.com>
> Sent: Tuesday, February 22, 2022 12:27 PM
> To: dev@dpdk.org
> Cc: Matan Azrad <matan@nvidia.com>; Raslan Darawsheh
> <rasland@nvidia.com>
> Subject: [PATCH v2 0/4] net/mlx5: support send scheduling for ConnectX-7
> 
> Since the ConnectX-6DX the send scheduling capability is provided.
> An application can register the dynamic field and dynamic flags in mbuf for
> timestamp and specify the desired moment of time the packet should sent.
> 
> The send scheduling feature over ConnectX-6DX uses the complicated
> infrastructure with reference Clock Queue and inter-queue synchronizing
> operations. Since ConnectX-7 the new wait descriptor format is introduced
> where the timestamp can be promoted to hardware directly.
> The patchset adds support for this new hardware option in PMD.
> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> 
> v1: http://patches.dpdk.org/project/dpdk/cover/20220214085655.22648-1-
> viacheslavo@nvidia.com/
> v2: - fix compilation issues
>     - rebase
> 
> Viacheslav Ovsiienko (4):
>   common/mlx5: add send on time capability check
>   net/mlx5: configure Tx queue with send on time offload
>   net/mlx5: add wait on time support in Tx datapath
>   doc: update send scheduling mlx5 feature description
> 
>  doc/guides/nics/mlx5.rst               |  5 ++
>  doc/guides/rel_notes/release_22_03.rst |  6 +++
>  drivers/common/mlx5/mlx5_devx_cmds.c   |  1 +
>  drivers/common/mlx5/mlx5_devx_cmds.h   |  1 +
>  drivers/common/mlx5/mlx5_prm.h         | 27 +++++++++-
>  drivers/net/mlx5/linux/mlx5_verbs.c    |  4 ++
>  drivers/net/mlx5/mlx5.h                |  3 ++
>  drivers/net/mlx5/mlx5_devx.c           |  2 +
>  drivers/net/mlx5/mlx5_tx.h             | 75 +++++++++++++++++++++++---
>  drivers/net/mlx5/mlx5_txq.c            | 18 ++++++-
>  10 files changed, 131 insertions(+), 11 deletions(-)
> 
> --
> 2.18.1

Series applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 4/4] doc: update send scheduling mlx5 feature description
  2022-02-22 10:26     ` [PATCH v2 4/4] doc: update send scheduling mlx5 feature description Viacheslav Ovsiienko
@ 2022-02-23 18:56       ` Ferruh Yigit
  2022-02-24 10:56         ` Slava Ovsiienko
  0 siblings, 1 reply; 18+ messages in thread
From: Ferruh Yigit @ 2022-02-23 18:56 UTC (permalink / raw)
  To: Viacheslav Ovsiienko, dev; +Cc: matan, rasland

On 2/22/2022 10:26 AM, Viacheslav Ovsiienko wrote:
> Updated:
>    - send scheduling feature description for mlx5
>    - release notes
> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> ---
>   doc/guides/nics/mlx5.rst               | 5 +++++
>   doc/guides/rel_notes/release_22_03.rst | 6 ++++++
>   2 files changed, 11 insertions(+)
> 

Hi Viacheslav,

Can you please distribute the doc updates to the patches
that are doing the updates in this patch, instead of having
a separate patch for it?

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v3 0/3] net/mlx5: support send scheduling for ConnectX-7
  2022-02-14  8:56 ` [PATCH 1/4] common/mlx5: add send on time capability check Viacheslav Ovsiienko
  2022-02-22 10:26   ` [PATCH v2 0/4] net/mlx5: support send scheduling for ConnectX-7 Viacheslav Ovsiienko
@ 2022-02-24 10:54   ` Viacheslav Ovsiienko
  2022-02-24 10:54     ` [PATCH v3 1/3] common/mlx5: add send on time capability check Viacheslav Ovsiienko
                       ` (3 more replies)
  1 sibling, 4 replies; 18+ messages in thread
From: Viacheslav Ovsiienko @ 2022-02-24 10:54 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland

Since the ConnectX-6DX the send scheduling capability is provided.
An application can register the dynamic field and dynamic flags
in mbuf for timestamp and specify the desired moment of time
the packet should sent.

The send scheduling feature over ConnectX-6DX uses the complicated
infrastructure with reference Clock Queue and inter-queue synchronizing
operations. Since ConnectX-7 the new wait descriptor format is
introduced where the timestamp can be promoted to hardware directly.
The patchset adds support for this new hardware option in PMD.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

v1: http://patches.dpdk.org/project/dpdk/cover/20220214085655.22648-1-viacheslavo@nvidia.com/
v2: http://patches.dpdk.org/project/dpdk/cover/20220222102648.4662-1-viacheslavo@nvidia.com/
    - fix compilation issues
    - rebase
v3: documentation patch rearranging    

Viacheslav Ovsiienko (3):
  common/mlx5: add send on time capability check
  net/mlx5: configure Tx queue with send on time offload
  net/mlx5: add wait on time support in Tx datapath

 doc/guides/nics/mlx5.rst               |  5 ++
 doc/guides/rel_notes/release_22_03.rst |  6 +++
 drivers/common/mlx5/mlx5_devx_cmds.c   |  1 +
 drivers/common/mlx5/mlx5_devx_cmds.h   |  1 +
 drivers/common/mlx5/mlx5_prm.h         | 27 +++++++++-
 drivers/net/mlx5/linux/mlx5_verbs.c    |  4 ++
 drivers/net/mlx5/mlx5.h                |  3 ++
 drivers/net/mlx5/mlx5_devx.c           |  2 +
 drivers/net/mlx5/mlx5_tx.h             | 75 +++++++++++++++++++++++---
 drivers/net/mlx5/mlx5_txq.c            | 18 ++++++-
 10 files changed, 131 insertions(+), 11 deletions(-)

-- 
2.18.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v3 1/3] common/mlx5: add send on time capability check
  2022-02-24 10:54   ` [PATCH v3 0/3] " Viacheslav Ovsiienko
@ 2022-02-24 10:54     ` Viacheslav Ovsiienko
  2022-02-24 10:55     ` [PATCH v3 2/3] net/mlx5: configure Tx queue with send on time offload Viacheslav Ovsiienko
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 18+ messages in thread
From: Viacheslav Ovsiienko @ 2022-02-24 10:54 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland

The patch provides check for send scheduling on time hardware capability.
With this capability enabled hardware is able to handle Wait WQEs
with directly specified timestamp values. No Clock Queue is needed
anymore to handle send scheduling.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c |  1 +
 drivers/common/mlx5/mlx5_devx_cmds.h |  1 +
 drivers/common/mlx5/mlx5_prm.h       | 27 ++++++++++++++++++++++++++-
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 70a430f134..9f1419ded8 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -962,6 +962,7 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
 		MLX5_GET(cmd_hca_cap, hcattr, umr_indirect_mkey_disabled);
 	attr->umr_modify_entity_size_disabled =
 		MLX5_GET(cmd_hca_cap, hcattr, umr_modify_entity_size_disabled);
+	attr->wait_on_time = MLX5_GET(cmd_hca_cap, hcattr, wait_on_time);
 	attr->crypto = MLX5_GET(cmd_hca_cap, hcattr, crypto);
 	if (attr->crypto)
 		attr->aes_xts = MLX5_GET(cmd_hca_cap, hcattr, aes_xts);
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 4373761c29..1bac18c59d 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -201,6 +201,7 @@ struct mlx5_hca_attr {
 	uint32_t scatter_fcs_w_decap_disable:1;
 	uint32_t flow_hit_aso:1; /* General obj type FLOW_HIT_ASO supported. */
 	uint32_t roce:1;
+	uint32_t wait_on_time:1;
 	uint32_t rq_ts_format:2;
 	uint32_t sq_ts_format:2;
 	uint32_t steering_format_version:4;
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index b9e39aa717..ce3e47059f 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -133,6 +133,19 @@
 #define MLX5_OPCODE_WAIT 0x0fu
 #endif
 
+#define MLX5_OPC_MOD_WAIT_CQ_PI 0u
+#define MLX5_OPC_MOD_WAIT_DATA 1u
+#define MLX5_OPC_MOD_WAIT_TIME 2u
+
+
+#define MLX5_WAIT_COND_INVERT 0x10u
+#define MLX5_WAIT_COND_ALWAYS_TRUE 0u
+#define MLX5_WAIT_COND_EQUAL 1u
+#define MLX5_WAIT_COND_BIGGER 2u
+#define MLX5_WAIT_COND_SMALLER 3u
+#define MLX5_WAIT_COND_CYCLIC_BIGGER 4u
+#define MLX5_WAIT_COND_CYCLIC_SMALLER 5u
+
 #ifndef HAVE_MLX5_OPCODE_ACCESS_ASO
 #define MLX5_OPCODE_ACCESS_ASO 0x2du
 #endif
@@ -348,6 +361,15 @@ struct mlx5_wqe_qseg {
 	uint32_t qpn_cqn;
 } __rte_packed;
 
+struct mlx5_wqe_wseg {
+	uint32_t operation;
+	uint32_t lkey;
+	uint32_t va_high;
+	uint32_t va_low;
+	uint64_t value;
+	uint64_t mask;
+} __rte_packed;
+
 /* The title WQEBB, header of WQE. */
 struct mlx5_wqe {
 	union {
@@ -1660,7 +1682,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8 num_vhca_ports[0x8];
 	u8 reserved_at_618[0x6];
 	u8 sw_owner_id[0x1];
-	u8 reserved_at_61f[0x129];
+	u8 reserved_at_61f[0x6C];
+	u8 wait_on_data[0x1];
+	u8 wait_on_time[0x1];
+	u8 reserved_at_68d[0xBB];
 	u8 dma_mmo_qp[0x1];
 	u8 regexp_mmo_qp[0x1];
 	u8 compress_mmo_qp[0x1];
-- 
2.18.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v3 2/3] net/mlx5: configure Tx queue with send on time offload
  2022-02-24 10:54   ` [PATCH v3 0/3] " Viacheslav Ovsiienko
  2022-02-24 10:54     ` [PATCH v3 1/3] common/mlx5: add send on time capability check Viacheslav Ovsiienko
@ 2022-02-24 10:55     ` Viacheslav Ovsiienko
  2022-02-24 10:55     ` [PATCH v3 3/3] net/mlx5: add wait on time support in Tx datapath Viacheslav Ovsiienko
  2022-02-24 12:48     ` [PATCH v3 0/3] net/mlx5: support send scheduling for ConnectX-7 Raslan Darawsheh
  3 siblings, 0 replies; 18+ messages in thread
From: Viacheslav Ovsiienko @ 2022-02-24 10:55 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland

The wait on time configuration flag is copied to the Tx queue
structure due to performance considerations. Timestamp
mask is preparted and stored in queue structure as well.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_verbs.c |  4 ++++
 drivers/net/mlx5/mlx5.h             |  3 +++
 drivers/net/mlx5/mlx5_devx.c        |  2 ++
 drivers/net/mlx5/mlx5_tx.h          |  3 +++
 drivers/net/mlx5/mlx5_txq.c         | 18 ++++++++++++++++--
 5 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_verbs.c b/drivers/net/mlx5/linux/mlx5_verbs.c
index dfbc5a1e08..b6ba21c216 100644
--- a/drivers/net/mlx5/linux/mlx5_verbs.c
+++ b/drivers/net/mlx5/linux/mlx5_verbs.c
@@ -1035,6 +1035,10 @@ mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->wqe_pi = 0;
 	txq_data->wqe_comp = 0;
 	txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV;
+	txq_data->wait_on_time = !!(!priv->sh->config.tx_pp &&
+				 priv->sh->cdev->config.hca_attr.wait_on_time &&
+				 txq_data->offloads &
+				 RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP);
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	/*
 	 * If using DevX need to query and store TIS transport domain value.
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index e7eaacc76f..0f465d0e9e 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -337,6 +337,9 @@ struct mlx5_lb_ctx {
 #define MLX5_CNT_ARRAY_IDX(pool, cnt) \
 	((int)(((uint8_t *)(cnt) - (uint8_t *)((pool) + 1)) / \
 	MLX5_CNT_LEN(pool)))
+#define MLX5_TS_MASK_SECS 8ull
+/* timestamp wrapping in seconds, must be  power of 2. */
+
 /*
  * The pool index and offset of counter in the pool array makes up the
  * counter index. In case the counter is from pool 0 and offset 0, it
diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index f18b18b1a2..a9b8c2a1b7 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -1328,6 +1328,8 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->qp_num_8s = txq_obj->sq_obj.sq->id << 8;
 	txq_data->db_heu = sh->cdev->config.dbnc == MLX5_SQ_DB_HEURISTIC;
 	txq_data->db_nc = sh->tx_uar.dbnc;
+	txq_data->wait_on_time = !!(!sh->config.tx_pp &&
+				    sh->cdev->config.hca_attr.wait_on_time);
 	/* Change Send Queue state to Ready-to-Send. */
 	ret = mlx5_txq_devx_modify(txq_obj, MLX5_TXQ_MOD_RST2RDY, 0);
 	if (ret) {
diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index c4b8271f6f..b50deb8b67 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -138,6 +138,8 @@ struct mlx5_txq_data {
 	uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */
 	uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */
 	uint16_t db_heu:1; /* Doorbell heuristic write barrier. */
+	uint16_t rt_timestamp:1; /* Realtime timestamp format. */
+	uint16_t wait_on_time:1; /* WQE with timestamp is supported. */
 	uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */
 	uint16_t inlen_send; /* Ordinary send data inline size. */
 	uint16_t inlen_empw; /* eMPW max packet size to inline. */
@@ -157,6 +159,7 @@ struct mlx5_txq_data {
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	uint16_t port_id; /* Port ID of device. */
 	uint16_t idx; /* Queue index. */
+	uint64_t rt_timemask; /* Scheduling timestamp mask. */
 	uint64_t ts_mask; /* Timestamp flag dynamic mask. */
 	int32_t ts_offset; /* Timestamp field dynamic offset. */
 	struct mlx5_dev_ctx_shared *sh; /* Shared context. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index edbaa50692..f128c3d1a5 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -109,7 +109,8 @@ mlx5_get_tx_port_offloads(struct rte_eth_dev *dev)
 			     RTE_ETH_TX_OFFLOAD_TCP_CKSUM);
 	if (dev_cap->tso)
 		offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
-	if (priv->sh->config.tx_pp)
+	if (priv->sh->config.tx_pp ||
+	    priv->sh->cdev->config.hca_attr.wait_on_time)
 		offloads |= RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP;
 	if (dev_cap->swp) {
 		if (dev_cap->swp & MLX5_SW_PARSING_CSUM_CAP)
@@ -1288,12 +1289,21 @@ mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev)
 	int off, nbit;
 	unsigned int i;
 	uint64_t mask = 0;
+	uint64_t ts_mask;
 
+	if (sh->dev_cap.rt_timestamp ||
+	    !sh->cdev->config.hca_attr.dev_freq_khz)
+		ts_mask = MLX5_TS_MASK_SECS << 32;
+	else
+		ts_mask = rte_align64pow2(MLX5_TS_MASK_SECS * 1000ull *
+				sh->cdev->config.hca_attr.dev_freq_khz);
+	ts_mask = rte_cpu_to_be_64(ts_mask - 1ull);
 	nbit = rte_mbuf_dynflag_lookup
 				(RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL);
 	off = rte_mbuf_dynfield_lookup
 				(RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL);
-	if (nbit >= 0 && off >= 0 && sh->txpp.refcnt)
+	if (nbit >= 0 && off >= 0 &&
+	    (sh->txpp.refcnt || priv->sh->cdev->config.hca_attr.wait_on_time))
 		mask = 1ULL << nbit;
 	for (i = 0; i != priv->txqs_n; ++i) {
 		data = (*priv->txqs)[i];
@@ -1302,5 +1312,9 @@ mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev)
 		data->sh = sh;
 		data->ts_mask = mask;
 		data->ts_offset = off;
+		data->rt_timestamp = sh->dev_cap.rt_timestamp;
+		data->rt_timemask = (data->offloads &
+				     RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP) ?
+				     ts_mask : 0;
 	}
 }
-- 
2.18.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v3 3/3] net/mlx5: add wait on time support in Tx datapath
  2022-02-24 10:54   ` [PATCH v3 0/3] " Viacheslav Ovsiienko
  2022-02-24 10:54     ` [PATCH v3 1/3] common/mlx5: add send on time capability check Viacheslav Ovsiienko
  2022-02-24 10:55     ` [PATCH v3 2/3] net/mlx5: configure Tx queue with send on time offload Viacheslav Ovsiienko
@ 2022-02-24 10:55     ` Viacheslav Ovsiienko
  2022-02-24 12:48     ` [PATCH v3 0/3] net/mlx5: support send scheduling for ConnectX-7 Raslan Darawsheh
  3 siblings, 0 replies; 18+ messages in thread
From: Viacheslav Ovsiienko @ 2022-02-24 10:55 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland

The hardware since ConnectX-7 supports waiting on
specified moment of time with new introduced wait
descriptor. A timestamp can be directly placed
into descriptor and pushed to sending queue.
Once hardware encounter the wait descriptor the
queue operation is suspended till specified moment
of time. This patch update the Tx datapath to handle
this new hardware wait capability.

PMD documentation and release notes updated accordingly.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 doc/guides/nics/mlx5.rst               |  5 ++
 doc/guides/rel_notes/release_22_03.rst |  6 +++
 drivers/net/mlx5/mlx5_tx.h             | 72 +++++++++++++++++++++++---
 3 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 968aac01d2..8956cd1dd8 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -829,6 +829,11 @@ for an additional list of options shared with other mlx5 drivers.
   By default (if the ``tx_pp`` is not specified) send scheduling on timestamps
   feature is disabled.
 
+  Starting since ConnectX-7 the capability to schedule traffic directly
+  on timestamp specified in descriptor is provided, no extra objects are
+  needed anymore and scheduling capability is advertised and handled
+  regardless tx_pp parameter presence.
+
 - ``tx_skew`` parameter [int]
 
   The parameter adjusts the send packet scheduling on timestamps and represents
diff --git a/doc/guides/rel_notes/release_22_03.rst b/doc/guides/rel_notes/release_22_03.rst
index 74965ebd56..acd56e0a80 100644
--- a/doc/guides/rel_notes/release_22_03.rst
+++ b/doc/guides/rel_notes/release_22_03.rst
@@ -118,6 +118,12 @@ New Features
   * Added PPPoL2TPv2oUDP FDIR distribute packets based on inner IP
     src/dst address and UDP/TCP src/dst port.
 
+* **Updated Mellanox mlx5 driver.**
+
+  Updated the Mellanox mlx5 driver with new features and improvements, including:
+
+  * Support ConnectX-7 capability to schedule traffic sending on timestamp
+
 * **Updated Wangxun ngbe driver.**
 
   * Added support for devices of custom PHY interfaces.
diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index b50deb8b67..0adc3f4839 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -780,7 +780,7 @@ mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq,
  *   compile time and may be used for optimization.
  */
 static __rte_always_inline void
-mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq,
+mlx5_tx_qseg_init(struct mlx5_txq_data *restrict txq,
 		  struct mlx5_txq_local *restrict loc __rte_unused,
 		  struct mlx5_wqe *restrict wqe,
 		  unsigned int wci,
@@ -795,6 +795,43 @@ mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq,
 	qs->reserved1 = RTE_BE32(0);
 }
 
+/**
+ * Build the Wait on Time Segment with specified timestamp value.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param loc
+ *   Pointer to burst routine local context.
+ * @param wqe
+ *   Pointer to WQE to fill with built Control Segment.
+ * @param ts
+ *   Timesatmp value to wait.
+ * @param olx
+ *   Configured Tx offloads mask. It is fully defined at
+ *   compile time and may be used for optimization.
+ */
+static __rte_always_inline void
+mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq,
+		  struct mlx5_txq_local *restrict loc __rte_unused,
+		  struct mlx5_wqe *restrict wqe,
+		  uint64_t ts,
+		  unsigned int olx __rte_unused)
+{
+	struct mlx5_wqe_wseg *ws;
+
+	ws = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE);
+	ws->operation = rte_cpu_to_be_32(MLX5_WAIT_COND_CYCLIC_BIGGER);
+	ws->lkey = RTE_BE32(0);
+	ws->va_high = RTE_BE32(0);
+	ws->va_low = RTE_BE32(0);
+	if (txq->rt_timestamp) {
+		ts = ts % (uint64_t)NS_PER_S
+		   | (ts / (uint64_t)NS_PER_S) << 32;
+	}
+	ws->value = rte_cpu_to_be_64(ts);
+	ws->mask = txq->rt_timemask;
+}
+
 /**
  * Build the Ethernet Segment without inlined data.
  * Supports Software Parser, Checksums and VLAN insertion Tx offload features.
@@ -1626,9 +1663,9 @@ mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq,
 {
 	if (MLX5_TXOFF_CONFIG(TXPP) &&
 	    loc->mbuf->ol_flags & txq->ts_mask) {
+		struct mlx5_dev_ctx_shared *sh;
 		struct mlx5_wqe *wqe;
 		uint64_t ts;
-		int32_t wci;
 
 		/*
 		 * Estimate the required space quickly and roughly.
@@ -1640,13 +1677,32 @@ mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq,
 			return MLX5_TXCMP_CODE_EXIT;
 		/* Convert the timestamp into completion to wait. */
 		ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *);
-		wci = mlx5_txpp_convert_tx_ts(txq->sh, ts);
-		if (unlikely(wci < 0))
-			return MLX5_TXCMP_CODE_SINGLE;
-		/* Build the WAIT WQE with specified completion. */
 		wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m);
-		mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx);
-		mlx5_tx_wseg_init(txq, loc, wqe, wci, olx);
+		sh = txq->sh;
+		if (txq->wait_on_time) {
+			/* The wait on time capability should be used. */
+			ts -= sh->txpp.skew;
+			mlx5_tx_cseg_init(txq, loc, wqe,
+					  1 + sizeof(struct mlx5_wqe_wseg) /
+					      MLX5_WSEG_SIZE,
+					  MLX5_OPCODE_WAIT |
+					  MLX5_OPC_MOD_WAIT_TIME << 24, olx);
+			mlx5_tx_wseg_init(txq, loc, wqe, ts, olx);
+		} else {
+			/* Legacy cross-channel operation should be used. */
+			int32_t wci;
+
+			wci = mlx5_txpp_convert_tx_ts(sh, ts);
+			if (unlikely(wci < 0))
+				return MLX5_TXCMP_CODE_SINGLE;
+			/* Build the WAIT WQE with specified completion. */
+			mlx5_tx_cseg_init(txq, loc, wqe,
+					  1 + sizeof(struct mlx5_wqe_qseg) /
+					      MLX5_WSEG_SIZE,
+					  MLX5_OPCODE_WAIT |
+					  MLX5_OPC_MOD_WAIT_CQ_PI << 24, olx);
+			mlx5_tx_qseg_init(txq, loc, wqe, wci, olx);
+		}
 		++txq->wqe_ci;
 		--loc->wqe_free;
 		return MLX5_TXCMP_CODE_MULTI;
-- 
2.18.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH v2 4/4] doc: update send scheduling mlx5 feature description
  2022-02-23 18:56       ` Ferruh Yigit
@ 2022-02-24 10:56         ` Slava Ovsiienko
  0 siblings, 0 replies; 18+ messages in thread
From: Slava Ovsiienko @ 2022-02-24 10:56 UTC (permalink / raw)
  To: Ferruh Yigit, dev; +Cc: Matan Azrad, Raslan Darawsheh

> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@intel.com>
> Sent: Wednesday, February 23, 2022 20:56
> To: Slava Ovsiienko <viacheslavo@nvidia.com>; dev@dpdk.org
> Cc: Matan Azrad <matan@nvidia.com>; Raslan Darawsheh
> <rasland@nvidia.com>
> Subject: Re: [PATCH v2 4/4] doc: update send scheduling mlx5 feature
> description
> 
> On 2/22/2022 10:26 AM, Viacheslav Ovsiienko wrote:
> > Updated:
> >    - send scheduling feature description for mlx5
> >    - release notes
> >
> > Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> > ---
> >   doc/guides/nics/mlx5.rst               | 5 +++++
> >   doc/guides/rel_notes/release_22_03.rst | 6 ++++++
> >   2 files changed, 11 insertions(+)
> >
> 
> Hi Viacheslav,
> 
> Can you please distribute the doc updates to the patches that are doing the
> updates in this patch, instead of having a separate patch for it?

Yes, please:
http://patches.dpdk.org/project/dpdk/cover/20220224105501.6549-1-viacheslavo@nvidia.com/

With best regards,
Slava

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH v3 0/3] net/mlx5: support send scheduling for ConnectX-7
  2022-02-24 10:54   ` [PATCH v3 0/3] " Viacheslav Ovsiienko
                       ` (2 preceding siblings ...)
  2022-02-24 10:55     ` [PATCH v3 3/3] net/mlx5: add wait on time support in Tx datapath Viacheslav Ovsiienko
@ 2022-02-24 12:48     ` Raslan Darawsheh
  3 siblings, 0 replies; 18+ messages in thread
From: Raslan Darawsheh @ 2022-02-24 12:48 UTC (permalink / raw)
  To: Slava Ovsiienko, dev; +Cc: Matan Azrad

Hi,

> -----Original Message-----
> From: Slava Ovsiienko <viacheslavo@nvidia.com>
> Sent: Thursday, February 24, 2022 12:55 PM
> To: dev@dpdk.org
> Cc: Matan Azrad <matan@nvidia.com>; Raslan Darawsheh
> <rasland@nvidia.com>
> Subject: [PATCH v3 0/3] net/mlx5: support send scheduling for ConnectX-7
> 
> Since the ConnectX-6DX the send scheduling capability is provided.
> An application can register the dynamic field and dynamic flags in mbuf for
> timestamp and specify the desired moment of time the packet should sent.
> 
> The send scheduling feature over ConnectX-6DX uses the complicated
> infrastructure with reference Clock Queue and inter-queue synchronizing
> operations. Since ConnectX-7 the new wait descriptor format is introduced
> where the timestamp can be promoted to hardware directly.
> The patchset adds support for this new hardware option in PMD.
> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> 
> v1: http://patches.dpdk.org/project/dpdk/cover/20220214085655.22648-1-
> viacheslavo@nvidia.com/
> v2: http://patches.dpdk.org/project/dpdk/cover/20220222102648.4662-1-
> viacheslavo@nvidia.com/
>     - fix compilation issues
>     - rebase
> v3: documentation patch rearranging
> 
> Viacheslav Ovsiienko (3):
>   common/mlx5: add send on time capability check
>   net/mlx5: configure Tx queue with send on time offload
>   net/mlx5: add wait on time support in Tx datapath
> 
>  doc/guides/nics/mlx5.rst               |  5 ++
>  doc/guides/rel_notes/release_22_03.rst |  6 +++
>  drivers/common/mlx5/mlx5_devx_cmds.c   |  1 +
>  drivers/common/mlx5/mlx5_devx_cmds.h   |  1 +
>  drivers/common/mlx5/mlx5_prm.h         | 27 +++++++++-
>  drivers/net/mlx5/linux/mlx5_verbs.c    |  4 ++
>  drivers/net/mlx5/mlx5.h                |  3 ++
>  drivers/net/mlx5/mlx5_devx.c           |  2 +
>  drivers/net/mlx5/mlx5_tx.h             | 75 +++++++++++++++++++++++---
>  drivers/net/mlx5/mlx5_txq.c            | 18 ++++++-
>  10 files changed, 131 insertions(+), 11 deletions(-)
> 
> --
> 2.18.1

Replaced V2 with this new version,
Series applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2022-02-24 12:48 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-14  8:56 [PATCH 0/4] net/mlx5: support send scheduling for ConnextX-7 Viacheslav Ovsiienko
2022-02-14  8:56 ` [PATCH 1/4] common/mlx5: add send on time capability check Viacheslav Ovsiienko
2022-02-22 10:26   ` [PATCH v2 0/4] net/mlx5: support send scheduling for ConnectX-7 Viacheslav Ovsiienko
2022-02-22 10:26     ` [PATCH v2 1/4] common/mlx5: add send on time capability check Viacheslav Ovsiienko
2022-02-22 10:26     ` [PATCH v2 2/4] net/mlx5: configure Tx queue with send on time offload Viacheslav Ovsiienko
2022-02-22 10:26     ` [PATCH v2 3/4] net/mlx5: add wait on time support in Tx datapath Viacheslav Ovsiienko
2022-02-22 10:26     ` [PATCH v2 4/4] doc: update send scheduling mlx5 feature description Viacheslav Ovsiienko
2022-02-23 18:56       ` Ferruh Yigit
2022-02-24 10:56         ` Slava Ovsiienko
2022-02-23 13:13     ` [PATCH v2 0/4] net/mlx5: support send scheduling for ConnectX-7 Raslan Darawsheh
2022-02-24 10:54   ` [PATCH v3 0/3] " Viacheslav Ovsiienko
2022-02-24 10:54     ` [PATCH v3 1/3] common/mlx5: add send on time capability check Viacheslav Ovsiienko
2022-02-24 10:55     ` [PATCH v3 2/3] net/mlx5: configure Tx queue with send on time offload Viacheslav Ovsiienko
2022-02-24 10:55     ` [PATCH v3 3/3] net/mlx5: add wait on time support in Tx datapath Viacheslav Ovsiienko
2022-02-24 12:48     ` [PATCH v3 0/3] net/mlx5: support send scheduling for ConnectX-7 Raslan Darawsheh
2022-02-14  8:56 ` [PATCH 2/4] net/mlx5: configure Tx queue with send on time offload Viacheslav Ovsiienko
2022-02-14  8:56 ` [PATCH 3/4] net/mlx5: add wait on time support in Tx datapath Viacheslav Ovsiienko
2022-02-14  8:56 ` [PATCH 4/4] doc: update send scheduling mlx5 feature description Viacheslav Ovsiienko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).