DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 0/4] net/mlx5: remove Tx descriptor reserved field usage
@ 2020-01-08 16:15 Viacheslav Ovsiienko
  2020-01-08 16:15 ` [dpdk-dev] [PATCH 1/4] net/mlx5: move Tx complete request routine Viacheslav Ovsiienko
                   ` (5 more replies)
  0 siblings, 6 replies; 31+ messages in thread
From: Viacheslav Ovsiienko @ 2020-01-08 16:15 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland, orika

The current Tx datapath implementation in mlx5 PMD uses the
16-bit reserved field within transmit descriptor to store
the indices of the elts array keeping the mbuf pointers to be
freed on transmit completion. On completion PMD fetches the
descriptor index, then fetches the elts array index from
reserved field and frees the mbufs.

The new ConnectX-6DX NIC might use this reserved descriptor
field and existing implementation might not work in intended way.
To resolve this issue the dedicated buffer is introduced to
store indices to instead of descriptor field.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>

Viacheslav Ovsiienko (4):
  net/mlx5: move Tx complete request routine
  net/mlx5: update Tx error handling routine
  net/mlx5: add free on completion queue
  net/mlx5: engage free on completion queue

 drivers/net/mlx5/mlx5_rxtx.c | 153 ++++++++++++++++++++-----------------------
 drivers/net/mlx5/mlx5_rxtx.h |  13 ++--
 drivers/net/mlx5/mlx5_txq.c  |  19 +++++-
 3 files changed, 94 insertions(+), 91 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH 1/4] net/mlx5: move Tx complete request routine
  2020-01-08 16:15 [dpdk-dev] [PATCH 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
@ 2020-01-08 16:15 ` Viacheslav Ovsiienko
  2020-01-08 16:15 ` [dpdk-dev] [PATCH 2/4] net/mlx5: update Tx error handling routine Viacheslav Ovsiienko
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 31+ messages in thread
From: Viacheslav Ovsiienko @ 2020-01-08 16:15 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland, orika

The complete request flag is set once per Tx burst call,
the code of appropriate routine moved to the end of sending
loop. This is preparation step to remove WQE reserved field
usage to store index of elts to free.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 26 ++++----------------------
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 25a2952..ee6d5fc 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -2145,9 +2145,6 @@ enum mlx5_txcmp_code {
  *   Pointer to TX queue structure.
  * @param loc
  *   Pointer to burst routine local context.
- * @param multi,
- *   Routine is called from multi-segment sending loop,
- *   do not correct the elts_head according to the pkts_copy.
  * @param olx
  *   Configured Tx offloads mask. It is fully defined at
  *   compile time and may be used for optimization.
@@ -2155,13 +2152,12 @@ enum mlx5_txcmp_code {
 static __rte_always_inline void
 mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq,
 			   struct mlx5_txq_local *restrict loc,
-			   bool multi,
 			   unsigned int olx)
 {
 	uint16_t head = txq->elts_head;
 	unsigned int part;
 
-	part = (MLX5_TXOFF_CONFIG(INLINE) || multi) ?
+	part = MLX5_TXOFF_CONFIG(INLINE) ?
 	       0 : loc->pkts_sent - loc->pkts_copy;
 	head += part;
 	if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH ||
@@ -3120,8 +3116,6 @@ enum mlx5_txcmp_code {
 	wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
 	txq->wqe_ci += (ds + 3) / 4;
 	loc->wqe_free -= (ds + 3) / 4;
-	/* Request CQE generation if limits are reached. */
-	mlx5_tx_request_completion(txq, loc, true, olx);
 	return MLX5_TXCMP_CODE_MULTI;
 }
 
@@ -3230,8 +3224,6 @@ enum mlx5_txcmp_code {
 	} while (true);
 	txq->wqe_ci += (ds + 3) / 4;
 	loc->wqe_free -= (ds + 3) / 4;
-	/* Request CQE generation if limits are reached. */
-	mlx5_tx_request_completion(txq, loc, true, olx);
 	return MLX5_TXCMP_CODE_MULTI;
 }
 
@@ -3388,8 +3380,6 @@ enum mlx5_txcmp_code {
 	wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
 	txq->wqe_ci += (ds + 3) / 4;
 	loc->wqe_free -= (ds + 3) / 4;
-	/* Request CQE generation if limits are reached. */
-	mlx5_tx_request_completion(txq, loc, true, olx);
 	return MLX5_TXCMP_CODE_MULTI;
 }
 
@@ -3599,8 +3589,6 @@ enum mlx5_txcmp_code {
 		--loc->elts_free;
 		++loc->pkts_sent;
 		--pkts_n;
-		/* Request CQE generation if limits are reached. */
-		mlx5_tx_request_completion(txq, loc, false, olx);
 		if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free))
 			return MLX5_TXCMP_CODE_EXIT;
 		loc->mbuf = *pkts++;
@@ -3750,7 +3738,7 @@ enum mlx5_txcmp_code {
 		   struct mlx5_txq_local *restrict loc,
 		   unsigned int ds,
 		   unsigned int slen,
-		   unsigned int olx)
+		   unsigned int olx __rte_unused)
 {
 	assert(!MLX5_TXOFF_CONFIG(INLINE));
 #ifdef MLX5_PMD_SOFT_COUNTERS
@@ -3765,8 +3753,6 @@ enum mlx5_txcmp_code {
 	loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
 	txq->wqe_ci += (ds + 3) / 4;
 	loc->wqe_free -= (ds + 3) / 4;
-	/* Request CQE generation if limits are reached. */
-	mlx5_tx_request_completion(txq, loc, false, olx);
 }
 
 /*
@@ -3809,8 +3795,6 @@ enum mlx5_txcmp_code {
 	loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len);
 	txq->wqe_ci += (len + 3) / 4;
 	loc->wqe_free -= (len + 3) / 4;
-	/* Request CQE generation if limits are reached. */
-	mlx5_tx_request_completion(txq, loc, false, olx);
 }
 
 /**
@@ -4011,8 +3995,6 @@ enum mlx5_txcmp_code {
 		txq->wqe_ci += (2 + part + 3) / 4;
 		loc->wqe_free -= (2 + part + 3) / 4;
 		pkts_n -= part;
-		/* Request CQE generation if limits are reached. */
-		mlx5_tx_request_completion(txq, loc, false, olx);
 		if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free))
 			return MLX5_TXCMP_CODE_EXIT;
 		loc->mbuf = *pkts++;
@@ -4496,8 +4478,6 @@ enum mlx5_txcmp_code {
 		}
 		++loc->pkts_sent;
 		--pkts_n;
-		/* Request CQE generation if limits are reached. */
-		mlx5_tx_request_completion(txq, loc, false, olx);
 		if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free))
 			return MLX5_TXCMP_CODE_EXIT;
 		loc->mbuf = *pkts++;
@@ -4776,6 +4756,8 @@ enum mlx5_txcmp_code {
 	/* Take a shortcut if nothing is sent. */
 	if (unlikely(loc.pkts_sent == loc.pkts_loop))
 		goto burst_exit;
+	/* Request CQE generation if limits are reached. */
+	mlx5_tx_request_completion(txq, &loc, olx);
 	/*
 	 * Ring QP doorbell immediately after WQE building completion
 	 * to improve latencies. The pure software related data treatment
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH 2/4] net/mlx5: update Tx error handling routine
  2020-01-08 16:15 [dpdk-dev] [PATCH 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
  2020-01-08 16:15 ` [dpdk-dev] [PATCH 1/4] net/mlx5: move Tx complete request routine Viacheslav Ovsiienko
@ 2020-01-08 16:15 ` Viacheslav Ovsiienko
  2020-01-08 16:16 ` [dpdk-dev] [PATCH 3/4] net/mlx5: add free on completion queue Viacheslav Ovsiienko
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 31+ messages in thread
From: Viacheslav Ovsiienko @ 2020-01-08 16:15 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland, orika

This is preparation step, we are going to store the index
of elts to free on completion in the dedicated free on
completion queue, this patch updates the elts freeing routine
and updates Tx error handling routine to be synched with
coming new queue.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 98 +++++++++++++++++++++++---------------------
 drivers/net/mlx5/mlx5_rxtx.h |  4 +-
 drivers/net/mlx5/mlx5_txq.c  |  2 +-
 3 files changed, 54 insertions(+), 50 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ee6d5fc..b7b40ac 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -654,10 +654,10 @@ enum mlx5_txcmp_code {
  *   Pointer to the error CQE.
  *
  * @return
- *   Negative value if queue recovery failed,
- *   the last Tx buffer element to free otherwise.
+ *   Negative value if queue recovery failed, otherwise
+ *   the error completion entry is handled successfully.
  */
-int
+static int
 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq,
 			 volatile struct mlx5_err_cqe *err_cqe)
 {
@@ -701,18 +701,14 @@ enum mlx5_txcmp_code {
 			 */
 			txq->stats.oerrors += ((txq->wqe_ci & wqe_m) -
 						new_wqe_pi) & wqe_m;
-		if (tx_recover_qp(txq_ctrl) == 0) {
-			txq->cq_ci++;
-			/* Release all the remaining buffers. */
-			return txq->elts_head;
+		if (tx_recover_qp(txq_ctrl)) {
+			/* Recovering failed - retry later on the same WQE. */
+			return -1;
 		}
-		/* Recovering failed - try again later on the same WQE. */
-		return -1;
-	} else {
-		txq->cq_ci++;
+		/* Release all the remaining buffers. */
+		txq_free_elts(txq_ctrl);
 	}
-	/* Do not release buffers. */
-	return txq->elts_tail;
+	return 0;
 }
 
 /**
@@ -2034,8 +2030,6 @@ enum mlx5_txcmp_code {
  *   Pointer to TX queue structure.
  * @param valid CQE pointer
  *   if not NULL update txq->wqe_pi and flush the buffers
- * @param itail
- *   if not negative - flush the buffers till this index.
  * @param olx
  *   Configured Tx offloads mask. It is fully defined at
  *   compile time and may be used for optimization.
@@ -2043,25 +2037,18 @@ enum mlx5_txcmp_code {
 static __rte_always_inline void
 mlx5_tx_comp_flush(struct mlx5_txq_data *restrict txq,
 		   volatile struct mlx5_cqe *last_cqe,
-		   int itail,
 		   unsigned int olx __rte_unused)
 {
-	uint16_t tail;
-
 	if (likely(last_cqe != NULL)) {
+		uint16_t tail;
+
 		txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter);
 		tail = ((volatile struct mlx5_wqe_cseg *)
 			(txq->wqes + (txq->wqe_pi & txq->wqe_m)))->misc;
-	} else if (itail >= 0) {
-		tail = (uint16_t)itail;
-	} else {
-		return;
-	}
-	rte_compiler_barrier();
-	*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
-	if (likely(tail != txq->elts_tail)) {
-		mlx5_tx_free_elts(txq, tail, olx);
-		assert(tail == txq->elts_tail);
+		if (likely(tail != txq->elts_tail)) {
+			mlx5_tx_free_elts(txq, tail, olx);
+			assert(tail == txq->elts_tail);
+		}
 	}
 }
 
@@ -2085,6 +2072,7 @@ enum mlx5_txcmp_code {
 {
 	unsigned int count = MLX5_TX_COMP_MAX_CQE;
 	volatile struct mlx5_cqe *last_cqe = NULL;
+	uint16_t ci = txq->cq_ci;
 	int ret;
 
 	static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value");
@@ -2092,8 +2080,8 @@ enum mlx5_txcmp_code {
 	do {
 		volatile struct mlx5_cqe *cqe;
 
-		cqe = &txq->cqes[txq->cq_ci & txq->cqe_m];
-		ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci);
+		cqe = &txq->cqes[ci & txq->cqe_m];
+		ret = check_cqe(cqe, txq->cqe_s, ci);
 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
 			if (likely(ret != MLX5_CQE_STATUS_ERR)) {
 				/* No new CQEs in completion queue. */
@@ -2109,31 +2097,49 @@ enum mlx5_txcmp_code {
 			rte_wmb();
 			ret = mlx5_tx_error_cqe_handle
 				(txq, (volatile struct mlx5_err_cqe *)cqe);
+			if (unlikely(ret < 0)) {
+				/*
+				 * Some error occurred on queue error
+				 * handling, we do not advance the index
+				 * here, allowing to retry on next call.
+				 */
+				return;
+			}
 			/*
-			 * Flush buffers, update consuming index
-			 * if recovery succeeded. Otherwise
-			 * just try to recover later.
+			 * We are going to fetch all entries with
+			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
 			 */
-			last_cqe = NULL;
-			break;
+			++ci;
+			continue;
 		}
 		/* Normal transmit completion. */
-		++txq->cq_ci;
+		++ci;
 		last_cqe = cqe;
 #ifndef NDEBUG
 		if (txq->cq_pi)
 			--txq->cq_pi;
 #endif
-	/*
-	 * We have to restrict the amount of processed CQEs
-	 * in one tx_burst routine call. The CQ may be large
-	 * and many CQEs may be updated by the NIC in one
-	 * transaction. Buffers freeing is time consuming,
-	 * multiple iterations may introduce significant
-	 * latency.
-	 */
-	} while (--count);
-	mlx5_tx_comp_flush(txq, last_cqe, ret, olx);
+		/*
+		 * We have to restrict the amount of processed CQEs
+		 * in one tx_burst routine call. The CQ may be large
+		 * and many CQEs may be updated by the NIC in one
+		 * transaction. Buffers freeing is time consuming,
+		 * multiple iterations may introduce significant
+		 * latency.
+		 */
+		if (--count == 0)
+			break;
+	} while (true);
+	if (likely(ci != txq->cq_ci)) {
+		/*
+		 * Update completion queue consuming index
+		 * and ring doorbell to notify hardware.
+		 */
+		rte_compiler_barrier();
+		txq->cq_ci = ci;
+		*txq->cq_db = rte_cpu_to_be_32(ci);
+		mlx5_tx_comp_flush(txq, last_cqe, olx);
+	}
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index e927343..8a2185a 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -440,6 +440,7 @@ struct mlx5_txq_ctrl *mlx5_txq_hairpin_new
 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_verify(struct rte_eth_dev *dev);
 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl);
+void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl);
 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev);
 
 /* mlx5_rxtx.c */
@@ -451,9 +452,6 @@ struct mlx5_txq_ctrl *mlx5_txq_hairpin_new
 void mlx5_set_ptype_table(void);
 void mlx5_set_cksum_table(void);
 void mlx5_set_swp_types_table(void);
-__rte_noinline int mlx5_tx_error_cqe_handle
-				(struct mlx5_txq_data *restrict txq,
-				 volatile struct mlx5_err_cqe *err_cqe);
 uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
 void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq);
 __rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec);
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 1c4f7e7..abe0947 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -62,7 +62,7 @@
  * @param txq_ctrl
  *   Pointer to TX queue structure.
  */
-static void
+void
 txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
 {
 	const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH 3/4] net/mlx5: add free on completion queue
  2020-01-08 16:15 [dpdk-dev] [PATCH 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
  2020-01-08 16:15 ` [dpdk-dev] [PATCH 1/4] net/mlx5: move Tx complete request routine Viacheslav Ovsiienko
  2020-01-08 16:15 ` [dpdk-dev] [PATCH 2/4] net/mlx5: update Tx error handling routine Viacheslav Ovsiienko
@ 2020-01-08 16:16 ` Viacheslav Ovsiienko
  2020-01-08 16:16 ` [dpdk-dev] [PATCH 4/4] net/mlx5: engage " Viacheslav Ovsiienko
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 31+ messages in thread
From: Viacheslav Ovsiienko @ 2020-01-08 16:16 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland, orika

The new software manged entity is introduced in Tx datapath
- free on completion queue. This queue keeps the information
how many buffers stored in elts array must freed on send
comletion. Each element of the queue contains transmitting
descriptor index to be in synch with completion entries (in
debug build only) and the index in elts array to free buffers.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.h |  5 +++++
 drivers/net/mlx5/mlx5_txq.c  | 17 +++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 8a2185a..ee1895b 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -297,6 +297,11 @@ struct mlx5_txq_data {
 	struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */
 	struct mlx5_wqe *wqes; /* Work queue. */
 	struct mlx5_wqe *wqes_end; /* Work queue array limit. */
+#ifdef NDEBUG
+	uint32_t *fcqs; /* Free completion queue. */
+#else
+	uint32_t *fcqs; /* Free completion queue (debug extended). */
+#endif
 	volatile struct mlx5_cqe *cqes; /* Completion queue. */
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index abe0947..5e6a605 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -724,6 +724,19 @@ struct mlx5_txq_obj *
 	txq_data->wqe_pi = 0;
 	txq_data->wqe_comp = 0;
 	txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV;
+	txq_data->fcqs = rte_calloc_socket(__func__,
+					   txq_data->cqe_s,
+					   sizeof(*txq_data->fcqs),
+					   RTE_CACHE_LINE_SIZE,
+					   txq_ctrl->socket);
+	if (!txq_data->fcqs) {
+		DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory (FCQ)",
+			dev->data->port_id, idx);
+		rte_errno = ENOMEM;
+		goto error;
+	}
+	txq_data->fcq_head = 0;
+	txq_data->fcq_tail = 0;
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	/*
 	 * If using DevX need to query and store TIS transport domain value.
@@ -772,6 +785,8 @@ struct mlx5_txq_obj *
 		claim_zero(mlx5_glue->destroy_cq(tmpl.cq));
 	if (tmpl.qp)
 		claim_zero(mlx5_glue->destroy_qp(tmpl.qp));
+	if (txq_data && txq_data->fcqs)
+		rte_free(txq_data->fcqs);
 	if (txq_obj)
 		rte_free(txq_obj);
 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
@@ -826,6 +841,8 @@ struct mlx5_txq_obj *
 		} else {
 			claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
 			claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
+				if (txq_obj->txq_ctrl->txq.fcqs)
+					rte_free(txq_obj->txq_ctrl->txq.fcqs);
 		}
 		LIST_REMOVE(txq_obj, next);
 		rte_free(txq_obj);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH 4/4] net/mlx5: engage free on completion queue
  2020-01-08 16:15 [dpdk-dev] [PATCH 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
                   ` (2 preceding siblings ...)
  2020-01-08 16:16 ` [dpdk-dev] [PATCH 3/4] net/mlx5: add free on completion queue Viacheslav Ovsiienko
@ 2020-01-08 16:16 ` Viacheslav Ovsiienko
  2020-01-09 10:56 ` [dpdk-dev] [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
  2020-01-09 17:16 ` [dpdk-dev] [PATCH v3 " Viacheslav Ovsiienko
  5 siblings, 0 replies; 31+ messages in thread
From: Viacheslav Ovsiienko @ 2020-01-08 16:16 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland, orika

The free on completion queue keeps the indices of elts array,
all mbuf stored below this index should be freed on arrival
of normal send completion. In debug version it also contains
an index of completed transmitting descriptor (WQE) to check
queues synchronization.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 33 +++++++++++++++++----------------
 drivers/net/mlx5/mlx5_rxtx.h |  6 ++----
 drivers/net/mlx5/mlx5_txq.c  |  4 ----
 3 files changed, 19 insertions(+), 24 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index b7b40ac..b11c5eb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -2043,8 +2043,7 @@ enum mlx5_txcmp_code {
 		uint16_t tail;
 
 		txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter);
-		tail = ((volatile struct mlx5_wqe_cseg *)
-			(txq->wqes + (txq->wqe_pi & txq->wqe_m)))->misc;
+		tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m];
 		if (likely(tail != txq->elts_tail)) {
 			mlx5_tx_free_elts(txq, tail, olx);
 			assert(tail == txq->elts_tail);
@@ -2095,6 +2094,7 @@ enum mlx5_txcmp_code {
 			 * here, before we might perform SQ reset.
 			 */
 			rte_wmb();
+			txq->cq_ci = ci;
 			ret = mlx5_tx_error_cqe_handle
 				(txq, (volatile struct mlx5_err_cqe *)cqe);
 			if (unlikely(ret < 0)) {
@@ -2108,17 +2108,18 @@ enum mlx5_txcmp_code {
 			/*
 			 * We are going to fetch all entries with
 			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
+			 * The send queue is supposed to be empty.
 			 */
 			++ci;
+			txq->cq_pi = ci;
+			last_cqe = NULL;
 			continue;
 		}
 		/* Normal transmit completion. */
+		assert(ci != txq->cq_pi);
+		assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe->wqe_counter);
 		++ci;
 		last_cqe = cqe;
-#ifndef NDEBUG
-		if (txq->cq_pi)
-			--txq->cq_pi;
-#endif
 		/*
 		 * We have to restrict the amount of processed CQEs
 		 * in one tx_burst routine call. The CQ may be large
@@ -2127,7 +2128,7 @@ enum mlx5_txcmp_code {
 		 * multiple iterations may introduce significant
 		 * latency.
 		 */
-		if (--count == 0)
+		if (likely(--count == 0))
 			break;
 	} while (true);
 	if (likely(ci != txq->cq_ci)) {
@@ -2177,15 +2178,15 @@ enum mlx5_txcmp_code {
 		/* Request unconditional completion on last WQE. */
 		last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
 					    MLX5_COMP_MODE_OFFSET);
-		/* Save elts_head in unused "immediate" field of WQE. */
-		last->cseg.misc = head;
-		/*
-		 * A CQE slot must always be available. Count the
-		 * issued CEQ "always" request instead of production
-		 * index due to here can be CQE with errors and
-		 * difference with ci may become inconsistent.
-		 */
-		assert(txq->cqe_s > ++txq->cq_pi);
+		/* Save elts_head in dedicated free on completion queue. */
+#ifdef NDEBUG
+		txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head;
+#else
+		txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head |
+					(last->cseg.opcode >> 8) << 16;
+#endif
+		/* A CQE slot must always be available. */
+		assert((txq->cq_pi - txq->cq_ci) <= txq->cqe_s);
 	}
 }
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index ee1895b..e362b4a 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -273,9 +273,7 @@ struct mlx5_txq_data {
 	uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */
 	/* WQ related fields. */
 	uint16_t cq_ci; /* Consumer index for completion queue. */
-#ifndef NDEBUG
-	uint16_t cq_pi; /* Counter of issued CQE "always" requests. */
-#endif
+	uint16_t cq_pi; /* Production index for completion queue. */
 	uint16_t cqe_s; /* Number of CQ elements. */
 	uint16_t cqe_m; /* Mask for CQ indices. */
 	/* CQ related fields. */
@@ -298,7 +296,7 @@ struct mlx5_txq_data {
 	struct mlx5_wqe *wqes; /* Work queue. */
 	struct mlx5_wqe *wqes_end; /* Work queue array limit. */
 #ifdef NDEBUG
-	uint32_t *fcqs; /* Free completion queue. */
+	uint16_t *fcqs; /* Free completion queue. */
 #else
 	uint32_t *fcqs; /* Free completion queue (debug extended). */
 #endif
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 5e6a605..c750082 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -717,9 +717,7 @@ struct mlx5_txq_obj *
 	txq_data->cq_db = cq_info.dbrec;
 	txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf;
 	txq_data->cq_ci = 0;
-#ifndef NDEBUG
 	txq_data->cq_pi = 0;
-#endif
 	txq_data->wqe_ci = 0;
 	txq_data->wqe_pi = 0;
 	txq_data->wqe_comp = 0;
@@ -735,8 +733,6 @@ struct mlx5_txq_obj *
 		rte_errno = ENOMEM;
 		goto error;
 	}
-	txq_data->fcq_head = 0;
-	txq_data->fcq_tail = 0;
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	/*
 	 * If using DevX need to query and store TIS transport domain value.
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage
  2020-01-08 16:15 [dpdk-dev] [PATCH 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
                   ` (3 preceding siblings ...)
  2020-01-08 16:16 ` [dpdk-dev] [PATCH 4/4] net/mlx5: engage " Viacheslav Ovsiienko
@ 2020-01-09 10:56 ` Viacheslav Ovsiienko
  2020-01-09 10:56   ` [dpdk-dev] [PATCH v2 1/4] net/mlx5: move Tx complete request routine Viacheslav Ovsiienko
                     ` (5 more replies)
  2020-01-09 17:16 ` [dpdk-dev] [PATCH v3 " Viacheslav Ovsiienko
  5 siblings, 6 replies; 31+ messages in thread
From: Viacheslav Ovsiienko @ 2020-01-09 10:56 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland, orika

The current Tx datapath implementation in mlx5 PMD uses the
16-bit reserved field within transmit descriptor to store
the indices of the elts array keeping the mbuf pointers to be
freed on transmit completion. On completion PMD fetches the
descriptor index, then fetches the elts array index from
reserved field and frees the mbufs.

The new ConnectX-6DX NIC might use this reserved descriptor
field and existing implementation might not work in intended way.
To resolve this issue the dedicated buffer is introduced to
store indices to instead of descriptor field.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>

Viacheslav Ovsiienko (4):
  net/mlx5: move Tx complete request routine
  net/mlx5: update Tx error handling routine
  net/mlx5: add free on completion queue
  net/mlx5: engage free on completion queue

 drivers/net/mlx5/mlx5_rxtx.c | 153 ++++++++++++++++++++-----------------------
 drivers/net/mlx5/mlx5_rxtx.h |  13 ++--
 drivers/net/mlx5/mlx5_txq.c  |  19 +++++-
 3 files changed, 94 insertions(+), 91 deletions(-)

--
v1: http://patches.dpdk.org/cover/64293/
v2: resolve minor compilation per patch issues

1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v2 1/4] net/mlx5: move Tx complete request routine
  2020-01-09 10:56 ` [dpdk-dev] [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
@ 2020-01-09 10:56   ` Viacheslav Ovsiienko
  2020-01-09 10:56   ` [dpdk-dev] [PATCH v2 2/4] net/mlx5: update Tx error handling routine Viacheslav Ovsiienko
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 31+ messages in thread
From: Viacheslav Ovsiienko @ 2020-01-09 10:56 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland, orika

The complete request flag is set once per Tx burst call,
the code of appropriate routine moved to the end of sending
loop. This is preparation step to remove WQE reserved field
usage to store index of elts to free.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 26 ++++----------------------
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 25a2952..ee6d5fc 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -2145,9 +2145,6 @@ enum mlx5_txcmp_code {
  *   Pointer to TX queue structure.
  * @param loc
  *   Pointer to burst routine local context.
- * @param multi,
- *   Routine is called from multi-segment sending loop,
- *   do not correct the elts_head according to the pkts_copy.
  * @param olx
  *   Configured Tx offloads mask. It is fully defined at
  *   compile time and may be used for optimization.
@@ -2155,13 +2152,12 @@ enum mlx5_txcmp_code {
 static __rte_always_inline void
 mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq,
 			   struct mlx5_txq_local *restrict loc,
-			   bool multi,
 			   unsigned int olx)
 {
 	uint16_t head = txq->elts_head;
 	unsigned int part;
 
-	part = (MLX5_TXOFF_CONFIG(INLINE) || multi) ?
+	part = MLX5_TXOFF_CONFIG(INLINE) ?
 	       0 : loc->pkts_sent - loc->pkts_copy;
 	head += part;
 	if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH ||
@@ -3120,8 +3116,6 @@ enum mlx5_txcmp_code {
 	wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
 	txq->wqe_ci += (ds + 3) / 4;
 	loc->wqe_free -= (ds + 3) / 4;
-	/* Request CQE generation if limits are reached. */
-	mlx5_tx_request_completion(txq, loc, true, olx);
 	return MLX5_TXCMP_CODE_MULTI;
 }
 
@@ -3230,8 +3224,6 @@ enum mlx5_txcmp_code {
 	} while (true);
 	txq->wqe_ci += (ds + 3) / 4;
 	loc->wqe_free -= (ds + 3) / 4;
-	/* Request CQE generation if limits are reached. */
-	mlx5_tx_request_completion(txq, loc, true, olx);
 	return MLX5_TXCMP_CODE_MULTI;
 }
 
@@ -3388,8 +3380,6 @@ enum mlx5_txcmp_code {
 	wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
 	txq->wqe_ci += (ds + 3) / 4;
 	loc->wqe_free -= (ds + 3) / 4;
-	/* Request CQE generation if limits are reached. */
-	mlx5_tx_request_completion(txq, loc, true, olx);
 	return MLX5_TXCMP_CODE_MULTI;
 }
 
@@ -3599,8 +3589,6 @@ enum mlx5_txcmp_code {
 		--loc->elts_free;
 		++loc->pkts_sent;
 		--pkts_n;
-		/* Request CQE generation if limits are reached. */
-		mlx5_tx_request_completion(txq, loc, false, olx);
 		if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free))
 			return MLX5_TXCMP_CODE_EXIT;
 		loc->mbuf = *pkts++;
@@ -3750,7 +3738,7 @@ enum mlx5_txcmp_code {
 		   struct mlx5_txq_local *restrict loc,
 		   unsigned int ds,
 		   unsigned int slen,
-		   unsigned int olx)
+		   unsigned int olx __rte_unused)
 {
 	assert(!MLX5_TXOFF_CONFIG(INLINE));
 #ifdef MLX5_PMD_SOFT_COUNTERS
@@ -3765,8 +3753,6 @@ enum mlx5_txcmp_code {
 	loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
 	txq->wqe_ci += (ds + 3) / 4;
 	loc->wqe_free -= (ds + 3) / 4;
-	/* Request CQE generation if limits are reached. */
-	mlx5_tx_request_completion(txq, loc, false, olx);
 }
 
 /*
@@ -3809,8 +3795,6 @@ enum mlx5_txcmp_code {
 	loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len);
 	txq->wqe_ci += (len + 3) / 4;
 	loc->wqe_free -= (len + 3) / 4;
-	/* Request CQE generation if limits are reached. */
-	mlx5_tx_request_completion(txq, loc, false, olx);
 }
 
 /**
@@ -4011,8 +3995,6 @@ enum mlx5_txcmp_code {
 		txq->wqe_ci += (2 + part + 3) / 4;
 		loc->wqe_free -= (2 + part + 3) / 4;
 		pkts_n -= part;
-		/* Request CQE generation if limits are reached. */
-		mlx5_tx_request_completion(txq, loc, false, olx);
 		if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free))
 			return MLX5_TXCMP_CODE_EXIT;
 		loc->mbuf = *pkts++;
@@ -4496,8 +4478,6 @@ enum mlx5_txcmp_code {
 		}
 		++loc->pkts_sent;
 		--pkts_n;
-		/* Request CQE generation if limits are reached. */
-		mlx5_tx_request_completion(txq, loc, false, olx);
 		if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free))
 			return MLX5_TXCMP_CODE_EXIT;
 		loc->mbuf = *pkts++;
@@ -4776,6 +4756,8 @@ enum mlx5_txcmp_code {
 	/* Take a shortcut if nothing is sent. */
 	if (unlikely(loc.pkts_sent == loc.pkts_loop))
 		goto burst_exit;
+	/* Request CQE generation if limits are reached. */
+	mlx5_tx_request_completion(txq, &loc, olx);
 	/*
 	 * Ring QP doorbell immediately after WQE building completion
 	 * to improve latencies. The pure software related data treatment
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v2 2/4] net/mlx5: update Tx error handling routine
  2020-01-09 10:56 ` [dpdk-dev] [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
  2020-01-09 10:56   ` [dpdk-dev] [PATCH v2 1/4] net/mlx5: move Tx complete request routine Viacheslav Ovsiienko
@ 2020-01-09 10:56   ` Viacheslav Ovsiienko
  2020-01-09 10:56   ` [dpdk-dev] [PATCH v2 3/4] net/mlx5: add free on completion queue Viacheslav Ovsiienko
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 31+ messages in thread
From: Viacheslav Ovsiienko @ 2020-01-09 10:56 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland, orika

This is preparation step, we are going to store the index
of elts to free on completion in the dedicated free on
completion queue, this patch updates the elts freeing routine
and updates Tx error handling routine to be synched with
coming new queue.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 98 +++++++++++++++++++++++---------------------
 drivers/net/mlx5/mlx5_rxtx.h |  4 +-
 drivers/net/mlx5/mlx5_txq.c  |  2 +-
 3 files changed, 54 insertions(+), 50 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ee6d5fc..b7b40ac 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -654,10 +654,10 @@ enum mlx5_txcmp_code {
  *   Pointer to the error CQE.
  *
  * @return
- *   Negative value if queue recovery failed,
- *   the last Tx buffer element to free otherwise.
+ *   Negative value if queue recovery failed, otherwise
+ *   the error completion entry is handled successfully.
  */
-int
+static int
 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq,
 			 volatile struct mlx5_err_cqe *err_cqe)
 {
@@ -701,18 +701,14 @@ enum mlx5_txcmp_code {
 			 */
 			txq->stats.oerrors += ((txq->wqe_ci & wqe_m) -
 						new_wqe_pi) & wqe_m;
-		if (tx_recover_qp(txq_ctrl) == 0) {
-			txq->cq_ci++;
-			/* Release all the remaining buffers. */
-			return txq->elts_head;
+		if (tx_recover_qp(txq_ctrl)) {
+			/* Recovering failed - retry later on the same WQE. */
+			return -1;
 		}
-		/* Recovering failed - try again later on the same WQE. */
-		return -1;
-	} else {
-		txq->cq_ci++;
+		/* Release all the remaining buffers. */
+		txq_free_elts(txq_ctrl);
 	}
-	/* Do not release buffers. */
-	return txq->elts_tail;
+	return 0;
 }
 
 /**
@@ -2034,8 +2030,6 @@ enum mlx5_txcmp_code {
  *   Pointer to TX queue structure.
  * @param valid CQE pointer
  *   if not NULL update txq->wqe_pi and flush the buffers
- * @param itail
- *   if not negative - flush the buffers till this index.
  * @param olx
  *   Configured Tx offloads mask. It is fully defined at
  *   compile time and may be used for optimization.
@@ -2043,25 +2037,18 @@ enum mlx5_txcmp_code {
 static __rte_always_inline void
 mlx5_tx_comp_flush(struct mlx5_txq_data *restrict txq,
 		   volatile struct mlx5_cqe *last_cqe,
-		   int itail,
 		   unsigned int olx __rte_unused)
 {
-	uint16_t tail;
-
 	if (likely(last_cqe != NULL)) {
+		uint16_t tail;
+
 		txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter);
 		tail = ((volatile struct mlx5_wqe_cseg *)
 			(txq->wqes + (txq->wqe_pi & txq->wqe_m)))->misc;
-	} else if (itail >= 0) {
-		tail = (uint16_t)itail;
-	} else {
-		return;
-	}
-	rte_compiler_barrier();
-	*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
-	if (likely(tail != txq->elts_tail)) {
-		mlx5_tx_free_elts(txq, tail, olx);
-		assert(tail == txq->elts_tail);
+		if (likely(tail != txq->elts_tail)) {
+			mlx5_tx_free_elts(txq, tail, olx);
+			assert(tail == txq->elts_tail);
+		}
 	}
 }
 
@@ -2085,6 +2072,7 @@ enum mlx5_txcmp_code {
 {
 	unsigned int count = MLX5_TX_COMP_MAX_CQE;
 	volatile struct mlx5_cqe *last_cqe = NULL;
+	uint16_t ci = txq->cq_ci;
 	int ret;
 
 	static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value");
@@ -2092,8 +2080,8 @@ enum mlx5_txcmp_code {
 	do {
 		volatile struct mlx5_cqe *cqe;
 
-		cqe = &txq->cqes[txq->cq_ci & txq->cqe_m];
-		ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci);
+		cqe = &txq->cqes[ci & txq->cqe_m];
+		ret = check_cqe(cqe, txq->cqe_s, ci);
 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
 			if (likely(ret != MLX5_CQE_STATUS_ERR)) {
 				/* No new CQEs in completion queue. */
@@ -2109,31 +2097,49 @@ enum mlx5_txcmp_code {
 			rte_wmb();
 			ret = mlx5_tx_error_cqe_handle
 				(txq, (volatile struct mlx5_err_cqe *)cqe);
+			if (unlikely(ret < 0)) {
+				/*
+				 * Some error occurred on queue error
+				 * handling, we do not advance the index
+				 * here, allowing to retry on next call.
+				 */
+				return;
+			}
 			/*
-			 * Flush buffers, update consuming index
-			 * if recovery succeeded. Otherwise
-			 * just try to recover later.
+			 * We are going to fetch all entries with
+			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
 			 */
-			last_cqe = NULL;
-			break;
+			++ci;
+			continue;
 		}
 		/* Normal transmit completion. */
-		++txq->cq_ci;
+		++ci;
 		last_cqe = cqe;
 #ifndef NDEBUG
 		if (txq->cq_pi)
 			--txq->cq_pi;
 #endif
-	/*
-	 * We have to restrict the amount of processed CQEs
-	 * in one tx_burst routine call. The CQ may be large
-	 * and many CQEs may be updated by the NIC in one
-	 * transaction. Buffers freeing is time consuming,
-	 * multiple iterations may introduce significant
-	 * latency.
-	 */
-	} while (--count);
-	mlx5_tx_comp_flush(txq, last_cqe, ret, olx);
+		/*
+		 * We have to restrict the amount of processed CQEs
+		 * in one tx_burst routine call. The CQ may be large
+		 * and many CQEs may be updated by the NIC in one
+		 * transaction. Buffers freeing is time consuming,
+		 * multiple iterations may introduce significant
+		 * latency.
+		 */
+		if (--count == 0)
+			break;
+	} while (true);
+	if (likely(ci != txq->cq_ci)) {
+		/*
+		 * Update completion queue consuming index
+		 * and ring doorbell to notify hardware.
+		 */
+		rte_compiler_barrier();
+		txq->cq_ci = ci;
+		*txq->cq_db = rte_cpu_to_be_32(ci);
+		mlx5_tx_comp_flush(txq, last_cqe, olx);
+	}
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index e927343..8a2185a 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -440,6 +440,7 @@ struct mlx5_txq_ctrl *mlx5_txq_hairpin_new
 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_verify(struct rte_eth_dev *dev);
 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl);
+void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl);
 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev);
 
 /* mlx5_rxtx.c */
@@ -451,9 +452,6 @@ struct mlx5_txq_ctrl *mlx5_txq_hairpin_new
 void mlx5_set_ptype_table(void);
 void mlx5_set_cksum_table(void);
 void mlx5_set_swp_types_table(void);
-__rte_noinline int mlx5_tx_error_cqe_handle
-				(struct mlx5_txq_data *restrict txq,
-				 volatile struct mlx5_err_cqe *err_cqe);
 uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
 void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq);
 __rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec);
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 1c4f7e7..abe0947 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -62,7 +62,7 @@
  * @param txq_ctrl
  *   Pointer to TX queue structure.
  */
-static void
+void
 txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
 {
 	const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v2 3/4] net/mlx5: add free on completion queue
  2020-01-09 10:56 ` [dpdk-dev] [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
  2020-01-09 10:56   ` [dpdk-dev] [PATCH v2 1/4] net/mlx5: move Tx complete request routine Viacheslav Ovsiienko
  2020-01-09 10:56   ` [dpdk-dev] [PATCH v2 2/4] net/mlx5: update Tx error handling routine Viacheslav Ovsiienko
@ 2020-01-09 10:56   ` Viacheslav Ovsiienko
  2020-01-09 15:12     ` Ferruh Yigit
  2020-01-09 10:56   ` [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage " Viacheslav Ovsiienko
                     ` (2 subsequent siblings)
  5 siblings, 1 reply; 31+ messages in thread
From: Viacheslav Ovsiienko @ 2020-01-09 10:56 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland, orika

The new software manged entity is introduced in Tx datapath
- free on completion queue. This queue keeps the information
how many buffers stored in elts array must freed on send
comletion. Each element of the queue contains transmitting
descriptor index to be in synch with completion entries (in
debug build only) and the index in elts array to free buffers.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.h |  5 +++++
 drivers/net/mlx5/mlx5_txq.c  | 15 +++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 8a2185a..ee1895b 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -297,6 +297,11 @@ struct mlx5_txq_data {
 	struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */
 	struct mlx5_wqe *wqes; /* Work queue. */
 	struct mlx5_wqe *wqes_end; /* Work queue array limit. */
+#ifdef NDEBUG
+	uint32_t *fcqs; /* Free completion queue. */
+#else
+	uint32_t *fcqs; /* Free completion queue (debug extended). */
+#endif
 	volatile struct mlx5_cqe *cqes; /* Completion queue. */
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index abe0947..aee0970 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -724,6 +724,17 @@ struct mlx5_txq_obj *
 	txq_data->wqe_pi = 0;
 	txq_data->wqe_comp = 0;
 	txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV;
+	txq_data->fcqs = rte_calloc_socket(__func__,
+					   txq_data->cqe_s,
+					   sizeof(*txq_data->fcqs),
+					   RTE_CACHE_LINE_SIZE,
+					   txq_ctrl->socket);
+	if (!txq_data->fcqs) {
+		DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory (FCQ)",
+			dev->data->port_id, idx);
+		rte_errno = ENOMEM;
+		goto error;
+	}
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	/*
 	 * If using DevX need to query and store TIS transport domain value.
@@ -772,6 +783,8 @@ struct mlx5_txq_obj *
 		claim_zero(mlx5_glue->destroy_cq(tmpl.cq));
 	if (tmpl.qp)
 		claim_zero(mlx5_glue->destroy_qp(tmpl.qp));
+	if (txq_data && txq_data->fcqs)
+		rte_free(txq_data->fcqs);
 	if (txq_obj)
 		rte_free(txq_obj);
 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
@@ -826,6 +839,8 @@ struct mlx5_txq_obj *
 		} else {
 			claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
 			claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
+				if (txq_obj->txq_ctrl->txq.fcqs)
+					rte_free(txq_obj->txq_ctrl->txq.fcqs);
 		}
 		LIST_REMOVE(txq_obj, next);
 		rte_free(txq_obj);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion queue
  2020-01-09 10:56 ` [dpdk-dev] [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
                     ` (2 preceding siblings ...)
  2020-01-09 10:56   ` [dpdk-dev] [PATCH v2 3/4] net/mlx5: add free on completion queue Viacheslav Ovsiienko
@ 2020-01-09 10:56   ` Viacheslav Ovsiienko
  2020-01-09 15:18     ` Ferruh Yigit
  2020-01-09 14:22   ` [dpdk-dev] [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage Raslan Darawsheh
  2020-01-13  9:35   ` Raslan Darawsheh
  5 siblings, 1 reply; 31+ messages in thread
From: Viacheslav Ovsiienko @ 2020-01-09 10:56 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland, orika

The free on completion queue keeps the indices of elts array,
all mbuf stored below this index should be freed on arrival
of normal send completion. In debug version it also contains
an index of completed transmitting descriptor (WQE) to check
queues synchronization.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 33 +++++++++++++++++----------------
 drivers/net/mlx5/mlx5_rxtx.h |  6 ++----
 drivers/net/mlx5/mlx5_txq.c  |  2 --
 3 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index b7b40ac..b11c5eb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -2043,8 +2043,7 @@ enum mlx5_txcmp_code {
 		uint16_t tail;
 
 		txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter);
-		tail = ((volatile struct mlx5_wqe_cseg *)
-			(txq->wqes + (txq->wqe_pi & txq->wqe_m)))->misc;
+		tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m];
 		if (likely(tail != txq->elts_tail)) {
 			mlx5_tx_free_elts(txq, tail, olx);
 			assert(tail == txq->elts_tail);
@@ -2095,6 +2094,7 @@ enum mlx5_txcmp_code {
 			 * here, before we might perform SQ reset.
 			 */
 			rte_wmb();
+			txq->cq_ci = ci;
 			ret = mlx5_tx_error_cqe_handle
 				(txq, (volatile struct mlx5_err_cqe *)cqe);
 			if (unlikely(ret < 0)) {
@@ -2108,17 +2108,18 @@ enum mlx5_txcmp_code {
 			/*
 			 * We are going to fetch all entries with
 			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
+			 * The send queue is supposed to be empty.
 			 */
 			++ci;
+			txq->cq_pi = ci;
+			last_cqe = NULL;
 			continue;
 		}
 		/* Normal transmit completion. */
+		assert(ci != txq->cq_pi);
+		assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe->wqe_counter);
 		++ci;
 		last_cqe = cqe;
-#ifndef NDEBUG
-		if (txq->cq_pi)
-			--txq->cq_pi;
-#endif
 		/*
 		 * We have to restrict the amount of processed CQEs
 		 * in one tx_burst routine call. The CQ may be large
@@ -2127,7 +2128,7 @@ enum mlx5_txcmp_code {
 		 * multiple iterations may introduce significant
 		 * latency.
 		 */
-		if (--count == 0)
+		if (likely(--count == 0))
 			break;
 	} while (true);
 	if (likely(ci != txq->cq_ci)) {
@@ -2177,15 +2178,15 @@ enum mlx5_txcmp_code {
 		/* Request unconditional completion on last WQE. */
 		last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
 					    MLX5_COMP_MODE_OFFSET);
-		/* Save elts_head in unused "immediate" field of WQE. */
-		last->cseg.misc = head;
-		/*
-		 * A CQE slot must always be available. Count the
-		 * issued CEQ "always" request instead of production
-		 * index due to here can be CQE with errors and
-		 * difference with ci may become inconsistent.
-		 */
-		assert(txq->cqe_s > ++txq->cq_pi);
+		/* Save elts_head in dedicated free on completion queue. */
+#ifdef NDEBUG
+		txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head;
+#else
+		txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head |
+					(last->cseg.opcode >> 8) << 16;
+#endif
+		/* A CQE slot must always be available. */
+		assert((txq->cq_pi - txq->cq_ci) <= txq->cqe_s);
 	}
 }
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index ee1895b..e362b4a 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -273,9 +273,7 @@ struct mlx5_txq_data {
 	uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */
 	/* WQ related fields. */
 	uint16_t cq_ci; /* Consumer index for completion queue. */
-#ifndef NDEBUG
-	uint16_t cq_pi; /* Counter of issued CQE "always" requests. */
-#endif
+	uint16_t cq_pi; /* Production index for completion queue. */
 	uint16_t cqe_s; /* Number of CQ elements. */
 	uint16_t cqe_m; /* Mask for CQ indices. */
 	/* CQ related fields. */
@@ -298,7 +296,7 @@ struct mlx5_txq_data {
 	struct mlx5_wqe *wqes; /* Work queue. */
 	struct mlx5_wqe *wqes_end; /* Work queue array limit. */
 #ifdef NDEBUG
-	uint32_t *fcqs; /* Free completion queue. */
+	uint16_t *fcqs; /* Free completion queue. */
 #else
 	uint32_t *fcqs; /* Free completion queue (debug extended). */
 #endif
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index aee0970..c750082 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -717,9 +717,7 @@ struct mlx5_txq_obj *
 	txq_data->cq_db = cq_info.dbrec;
 	txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf;
 	txq_data->cq_ci = 0;
-#ifndef NDEBUG
 	txq_data->cq_pi = 0;
-#endif
 	txq_data->wqe_ci = 0;
 	txq_data->wqe_pi = 0;
 	txq_data->wqe_comp = 0;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage
  2020-01-09 10:56 ` [dpdk-dev] [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
                     ` (3 preceding siblings ...)
  2020-01-09 10:56   ` [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage " Viacheslav Ovsiienko
@ 2020-01-09 14:22   ` Raslan Darawsheh
  2020-01-13  9:35   ` Raslan Darawsheh
  5 siblings, 0 replies; 31+ messages in thread
From: Raslan Darawsheh @ 2020-01-09 14:22 UTC (permalink / raw)
  To: Slava Ovsiienko, dev; +Cc: Matan Azrad, Ori Kam

Hi,

Series applied on next-net-mlx,

Kindest regards,
Raslan Darawsheh

> -----Original Message-----
> From: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> Sent: Thursday, January 9, 2020 12:56 PM
> To: dev@dpdk.org
> Cc: Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
> <rasland@mellanox.com>; Ori Kam <orika@mellanox.com>
> Subject: [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage
> 
> The current Tx datapath implementation in mlx5 PMD uses the 16-bit
> reserved field within transmit descriptor to store the indices of the elts array
> keeping the mbuf pointers to be freed on transmit completion. On
> completion PMD fetches the descriptor index, then fetches the elts array
> index from reserved field and frees the mbufs.
> 
> The new ConnectX-6DX NIC might use this reserved descriptor field and
> existing implementation might not work in intended way.
> To resolve this issue the dedicated buffer is introduced to store indices to
> instead of descriptor field.
> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> 
> Viacheslav Ovsiienko (4):
>   net/mlx5: move Tx complete request routine
>   net/mlx5: update Tx error handling routine
>   net/mlx5: add free on completion queue
>   net/mlx5: engage free on completion queue
> 
>  drivers/net/mlx5/mlx5_rxtx.c | 153 ++++++++++++++++++++-----------------
> ------
>  drivers/net/mlx5/mlx5_rxtx.h |  13 ++--  drivers/net/mlx5/mlx5_txq.c  |  19
> +++++-
>  3 files changed, 94 insertions(+), 91 deletions(-)
> 
> --
> v1:
> https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch
> es.dpdk.org%2Fcover%2F64293%2F&amp;data=02%7C01%7Crasland%40mell
> anox.com%7C56986b5d3d3c46f2725b08d794f297e6%7Ca652971c7d2e4d9ba6
> a4d149256f461b%7C0%7C0%7C637141641963098885&amp;sdata=RP4VgjCQlp
> oJc5J38aajK9Rr8twtJ4d%2FSVP2JxM5C98%3D&amp;reserved=0
> v2: resolve minor compilation per patch issues
> 
> 1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 3/4] net/mlx5: add free on completion queue
  2020-01-09 10:56   ` [dpdk-dev] [PATCH v2 3/4] net/mlx5: add free on completion queue Viacheslav Ovsiienko
@ 2020-01-09 15:12     ` Ferruh Yigit
  2020-01-09 15:22       ` Slava Ovsiienko
  0 siblings, 1 reply; 31+ messages in thread
From: Ferruh Yigit @ 2020-01-09 15:12 UTC (permalink / raw)
  To: Viacheslav Ovsiienko, dev; +Cc: matan, rasland, orika

On 1/9/2020 10:56 AM, Viacheslav Ovsiienko wrote:
> The new software manged entity is introduced in Tx datapath
> - free on completion queue. This queue keeps the information
> how many buffers stored in elts array must freed on send
> comletion. Each element of the queue contains transmitting
> descriptor index to be in synch with completion entries (in
> debug build only) and the index in elts array to free buffers.
> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> Acked-by: Matan Azrad <matan@mellanox.com>

<...>

> @@ -297,6 +297,11 @@ struct mlx5_txq_data {
>  	struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */
>  	struct mlx5_wqe *wqes; /* Work queue. */
>  	struct mlx5_wqe *wqes_end; /* Work queue array limit. */
> +#ifdef NDEBUG
> +	uint32_t *fcqs; /* Free completion queue. */
> +#else
> +	uint32_t *fcqs; /* Free completion queue (debug extended). */
> +#endif

Why is the #ifdef required?

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion queue
  2020-01-09 10:56   ` [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage " Viacheslav Ovsiienko
@ 2020-01-09 15:18     ` Ferruh Yigit
  2020-01-09 15:27       ` Slava Ovsiienko
  0 siblings, 1 reply; 31+ messages in thread
From: Ferruh Yigit @ 2020-01-09 15:18 UTC (permalink / raw)
  To: Viacheslav Ovsiienko, dev; +Cc: matan, rasland, orika, Thomas Monjalon

On 1/9/2020 10:56 AM, Viacheslav Ovsiienko wrote:
> The free on completion queue keeps the indices of elts array,
> all mbuf stored below this index should be freed on arrival
> of normal send completion. In debug version it also contains
> an index of completed transmitting descriptor (WQE) to check
> queues synchronization.
> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> Acked-by: Matan Azrad <matan@mellanox.com>

<...>

> @@ -2108,17 +2108,18 @@ enum mlx5_txcmp_code {
>  			/*
>  			 * We are going to fetch all entries with
>  			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
> +			 * The send queue is supposed to be empty.
>  			 */
>  			++ci;
> +			txq->cq_pi = ci;
> +			last_cqe = NULL;
>  			continue;
>  		}
>  		/* Normal transmit completion. */
> +		assert(ci != txq->cq_pi);
> +		assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe->wqe_counter);

And same comments on these as previous patches, we spend some effort to remove
the 'rte_panic' from drivers, this is almost same thing.

I think a driver shouldn't decide to exit whole application, it's effect should
be limited to the driver.

Assert is useful for debug and during development, but not sure having them in
the production code.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 3/4] net/mlx5: add free on completion queue
  2020-01-09 15:12     ` Ferruh Yigit
@ 2020-01-09 15:22       ` Slava Ovsiienko
  0 siblings, 0 replies; 31+ messages in thread
From: Slava Ovsiienko @ 2020-01-09 15:22 UTC (permalink / raw)
  To: Ferruh Yigit, dev; +Cc: Matan Azrad, Raslan Darawsheh, Ori Kam

> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@intel.com>
> Sent: Thursday, January 9, 2020 17:12
> To: Slava Ovsiienko <viacheslavo@mellanox.com>; dev@dpdk.org
> Cc: Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
> <rasland@mellanox.com>; Ori Kam <orika@mellanox.com>
> Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/mlx5: add free on completion
> queue
> 
> On 1/9/2020 10:56 AM, Viacheslav Ovsiienko wrote:
> > The new software manged entity is introduced in Tx datapath
> > - free on completion queue. This queue keeps the information how many
> > buffers stored in elts array must freed on send comletion. Each
> > element of the queue contains transmitting descriptor index to be in
> > synch with completion entries (in debug build only) and the index in
> > elts array to free buffers.
> >
> > Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> > Acked-by: Matan Azrad <matan@mellanox.com>
> 
> <...>
> 
> > @@ -297,6 +297,11 @@ struct mlx5_txq_data {
> >  	struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */
> >  	struct mlx5_wqe *wqes; /* Work queue. */
> >  	struct mlx5_wqe *wqes_end; /* Work queue array limit. */
> > +#ifdef NDEBUG
> > +	uint32_t *fcqs; /* Free completion queue. */ #else
> > +	uint32_t *fcqs; /* Free completion queue (debug extended). */ #endif
> 
> Why is the #ifdef required?

It is a misprint, in non-debug version it should be "uint16_t*" to save some memory.
Thanks for the pointing out, will fix.

With best regards, Slava

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion queue
  2020-01-09 15:18     ` Ferruh Yigit
@ 2020-01-09 15:27       ` Slava Ovsiienko
  2020-01-09 15:43         ` Ferruh Yigit
  0 siblings, 1 reply; 31+ messages in thread
From: Slava Ovsiienko @ 2020-01-09 15:27 UTC (permalink / raw)
  To: Ferruh Yigit, dev; +Cc: Matan Azrad, Raslan Darawsheh, Ori Kam, Thomas Monjalon

> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@intel.com>
> Sent: Thursday, January 9, 2020 17:19
> To: Slava Ovsiienko <viacheslavo@mellanox.com>; dev@dpdk.org
> Cc: Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
> <rasland@mellanox.com>; Ori Kam <orika@mellanox.com>; Thomas
> Monjalon <thomas@monjalon.net>
> Subject: Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion
> queue
> 
> On 1/9/2020 10:56 AM, Viacheslav Ovsiienko wrote:
> > The free on completion queue keeps the indices of elts array, all mbuf
> > stored below this index should be freed on arrival of normal send
> > completion. In debug version it also contains an index of completed
> > transmitting descriptor (WQE) to check queues synchronization.
> >
> > Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> > Acked-by: Matan Azrad <matan@mellanox.com>
> 
> <...>
> 
> > @@ -2108,17 +2108,18 @@ enum mlx5_txcmp_code {
> >  			/*
> >  			 * We are going to fetch all entries with
> >  			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
> > +			 * The send queue is supposed to be empty.
> >  			 */
> >  			++ci;
> > +			txq->cq_pi = ci;
> > +			last_cqe = NULL;
> >  			continue;
> >  		}
> >  		/* Normal transmit completion. */
> > +		assert(ci != txq->cq_pi);
> > +		assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe-
> >wqe_counter);
> 
> And same comments on these as previous patches, we spend some effort to
> remove the 'rte_panic' from drivers, this is almost same thing.
> 
> I think a driver shouldn't decide to exit whole application, it's effect should be
> limited to the driver.
> 
> Assert is useful for debug and during development, but not sure having them
> in the production code.

IIRC, "assert" is standard C function. Compiled only if there is no NDEBUG defined.
So, assert does exactly what you are saying - provide the debug break
not allowing the bug to evolve. And no this break in production code.

With best regards, Slava


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion queue
  2020-01-09 15:27       ` Slava Ovsiienko
@ 2020-01-09 15:43         ` Ferruh Yigit
  2020-01-09 16:22           ` Slava Ovsiienko
  2020-01-17 10:44           ` Slava Ovsiienko
  0 siblings, 2 replies; 31+ messages in thread
From: Ferruh Yigit @ 2020-01-09 15:43 UTC (permalink / raw)
  To: Slava Ovsiienko, dev
  Cc: Matan Azrad, Raslan Darawsheh, Ori Kam, Thomas Monjalon

On 1/9/2020 3:27 PM, Slava Ovsiienko wrote:
>> -----Original Message-----
>> From: Ferruh Yigit <ferruh.yigit@intel.com>
>> Sent: Thursday, January 9, 2020 17:19
>> To: Slava Ovsiienko <viacheslavo@mellanox.com>; dev@dpdk.org
>> Cc: Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
>> <rasland@mellanox.com>; Ori Kam <orika@mellanox.com>; Thomas
>> Monjalon <thomas@monjalon.net>
>> Subject: Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion
>> queue
>>
>> On 1/9/2020 10:56 AM, Viacheslav Ovsiienko wrote:
>>> The free on completion queue keeps the indices of elts array, all mbuf
>>> stored below this index should be freed on arrival of normal send
>>> completion. In debug version it also contains an index of completed
>>> transmitting descriptor (WQE) to check queues synchronization.
>>>
>>> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
>>> Acked-by: Matan Azrad <matan@mellanox.com>
>>
>> <...>
>>
>>> @@ -2108,17 +2108,18 @@ enum mlx5_txcmp_code {
>>>  			/*
>>>  			 * We are going to fetch all entries with
>>>  			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
>>> +			 * The send queue is supposed to be empty.
>>>  			 */
>>>  			++ci;
>>> +			txq->cq_pi = ci;
>>> +			last_cqe = NULL;
>>>  			continue;
>>>  		}
>>>  		/* Normal transmit completion. */
>>> +		assert(ci != txq->cq_pi);
>>> +		assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe-
>>> wqe_counter);
>>
>> And same comments on these as previous patches, we spend some effort to
>> remove the 'rte_panic' from drivers, this is almost same thing.
>>
>> I think a driver shouldn't decide to exit whole application, it's effect should be
>> limited to the driver.
>>
>> Assert is useful for debug and during development, but not sure having them
>> in the production code.
> 
> IIRC, "assert" is standard C function. Compiled only if there is no NDEBUG defined.
> So, assert does exactly what you are saying - provide the debug break
> not allowing the bug to evolve. And no this break in production code.
> 

Since mlx driver is using NDEBUG defined, what you said is right indeed. But why
not using RTE_ASSERT to be consistent with rest. There is a specific config
option to control assert (RTE_ENABLE_ASSERT) and anyone using it will get
different behavior with mlx5.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion queue
  2020-01-09 15:43         ` Ferruh Yigit
@ 2020-01-09 16:22           ` Slava Ovsiienko
  2020-01-10  9:06             ` Thomas Monjalon
  2020-01-17 10:44           ` Slava Ovsiienko
  1 sibling, 1 reply; 31+ messages in thread
From: Slava Ovsiienko @ 2020-01-09 16:22 UTC (permalink / raw)
  To: Ferruh Yigit, dev; +Cc: Matan Azrad, Raslan Darawsheh, Ori Kam, Thomas Monjalon

> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@intel.com>
> Sent: Thursday, January 9, 2020 17:44
> To: Slava Ovsiienko <viacheslavo@mellanox.com>; dev@dpdk.org
> Cc: Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
> <rasland@mellanox.com>; Ori Kam <orika@mellanox.com>; Thomas
> Monjalon <thomas@monjalon.net>
> Subject: Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion
> queue
> 
> On 1/9/2020 3:27 PM, Slava Ovsiienko wrote:
> >> -----Original Message-----
> >> From: Ferruh Yigit <ferruh.yigit@intel.com>
> >> Sent: Thursday, January 9, 2020 17:19
> >> To: Slava Ovsiienko <viacheslavo@mellanox.com>; dev@dpdk.org
> >> Cc: Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
> >> <rasland@mellanox.com>; Ori Kam <orika@mellanox.com>; Thomas
> Monjalon
> >> <thomas@monjalon.net>
> >> Subject: Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on
> >> completion queue
> >>
> >> On 1/9/2020 10:56 AM, Viacheslav Ovsiienko wrote:
> >>> The free on completion queue keeps the indices of elts array, all
> >>> mbuf stored below this index should be freed on arrival of normal
> >>> send completion. In debug version it also contains an index of
> >>> completed transmitting descriptor (WQE) to check queues
> synchronization.
> >>>
> >>> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> >>> Acked-by: Matan Azrad <matan@mellanox.com>
> >>
> >> <...>
> >>
> >>> @@ -2108,17 +2108,18 @@ enum mlx5_txcmp_code {
> >>>  			/*
> >>>  			 * We are going to fetch all entries with
> >>>  			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
> >>> +			 * The send queue is supposed to be empty.
> >>>  			 */
> >>>  			++ci;
> >>> +			txq->cq_pi = ci;
> >>> +			last_cqe = NULL;
> >>>  			continue;
> >>>  		}
> >>>  		/* Normal transmit completion. */
> >>> +		assert(ci != txq->cq_pi);
> >>> +		assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe-
> >>> wqe_counter);
> >>
> >> And same comments on these as previous patches, we spend some effort
> >> to remove the 'rte_panic' from drivers, this is almost same thing.
> >>
> >> I think a driver shouldn't decide to exit whole application, it's
> >> effect should be limited to the driver.
> >>
> >> Assert is useful for debug and during development, but not sure
> >> having them in the production code.
> >
> > IIRC, "assert" is standard C function. Compiled only if there is no NDEBUG
> defined.
> > So, assert does exactly what you are saying - provide the debug break
> > not allowing the bug to evolve. And no this break in production code.
> >
> 
> Since mlx driver is using NDEBUG defined, what you said is right indeed. But
> why not using RTE_ASSERT to be consistent with rest. There is a specific config
> option to control assert (RTE_ENABLE_ASSERT) and anyone using it will get
> different behavior with mlx5.

We have the dedicated option to control mlx5 debug:
CONFIG_RTE_ENABLE_ASSERT controls the whole DPDK.
CONFIG_RTE_LIBRTE_MLX5_DEBUG controls NDEBUG for mlx5

From my practice - I switch the mlx5 debug option (in the process of the debugging/testing
datapath and checking the resulting performance, by directly defining NDEBUG in mlx5.h and
not reconfiguring/rebuilding the entire DPDK), this fine grained option seems to be useful.

With best regards, Slava



^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v3 0/4] net/mlx5: remove Tx descriptor reserved field usage
  2020-01-08 16:15 [dpdk-dev] [PATCH 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
                   ` (4 preceding siblings ...)
  2020-01-09 10:56 ` [dpdk-dev] [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
@ 2020-01-09 17:16 ` Viacheslav Ovsiienko
  2020-01-09 17:16   ` [dpdk-dev] [PATCH v3 1/4] net/mlx5: move Tx complete request routine Viacheslav Ovsiienko
                     ` (4 more replies)
  5 siblings, 5 replies; 31+ messages in thread
From: Viacheslav Ovsiienko @ 2020-01-09 17:16 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland, orika

The current Tx datapath implementation in mlx5 PMD uses the
16-bit reserved field within transmit descriptor to store
the indices of the elts array keeping the mbuf pointers to be
freed on transmit completion. On completion PMD fetches the
descriptor index, then fetches the elts array index from
reserved field and frees the mbufs.

The new ConnectX-6DX NIC might use this reserved descriptor
field and existing implementation might not work in intended way.
To resolve this issue the dedicated buffer is introduced to
store indices to instead of descriptor field.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>

Viacheslav Ovsiienko (4):
  net/mlx5: move Tx complete request routine
  net/mlx5: update Tx error handling routine
  net/mlx5: add free on completion queue
  net/mlx5: engage free on completion queue

 drivers/net/mlx5/mlx5_rxtx.c | 153 ++++++++++++++++++++-----------------------
 drivers/net/mlx5/mlx5_rxtx.h |  13 ++--
 drivers/net/mlx5/mlx5_txq.c  |  19 +++++-
 3 files changed, 94 insertions(+), 91 deletions(-)

--
v1: - http://patches.dpdk.org/cover/64293/
v2: - http://patches.dpdk.org/cover/64331/
    - resolve minor compilation per patch issues
v3: - change the fcq entry type to uint16_t in non-debug mode

1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v3 1/4] net/mlx5: move Tx complete request routine
  2020-01-09 17:16 ` [dpdk-dev] [PATCH v3 " Viacheslav Ovsiienko
@ 2020-01-09 17:16   ` Viacheslav Ovsiienko
  2020-01-09 17:16   ` [dpdk-dev] [PATCH v3 2/4] net/mlx5: update Tx error handling routine Viacheslav Ovsiienko
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 31+ messages in thread
From: Viacheslav Ovsiienko @ 2020-01-09 17:16 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland, orika

The complete request flag is set once per Tx burst call,
the code of appropriate routine moved to the end of sending
loop. This is preparation step to remove WQE reserved field
usage to store index of elts to free.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 26 ++++----------------------
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 25a2952..ee6d5fc 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -2145,9 +2145,6 @@ enum mlx5_txcmp_code {
  *   Pointer to TX queue structure.
  * @param loc
  *   Pointer to burst routine local context.
- * @param multi,
- *   Routine is called from multi-segment sending loop,
- *   do not correct the elts_head according to the pkts_copy.
  * @param olx
  *   Configured Tx offloads mask. It is fully defined at
  *   compile time and may be used for optimization.
@@ -2155,13 +2152,12 @@ enum mlx5_txcmp_code {
 static __rte_always_inline void
 mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq,
 			   struct mlx5_txq_local *restrict loc,
-			   bool multi,
 			   unsigned int olx)
 {
 	uint16_t head = txq->elts_head;
 	unsigned int part;
 
-	part = (MLX5_TXOFF_CONFIG(INLINE) || multi) ?
+	part = MLX5_TXOFF_CONFIG(INLINE) ?
 	       0 : loc->pkts_sent - loc->pkts_copy;
 	head += part;
 	if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH ||
@@ -3120,8 +3116,6 @@ enum mlx5_txcmp_code {
 	wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
 	txq->wqe_ci += (ds + 3) / 4;
 	loc->wqe_free -= (ds + 3) / 4;
-	/* Request CQE generation if limits are reached. */
-	mlx5_tx_request_completion(txq, loc, true, olx);
 	return MLX5_TXCMP_CODE_MULTI;
 }
 
@@ -3230,8 +3224,6 @@ enum mlx5_txcmp_code {
 	} while (true);
 	txq->wqe_ci += (ds + 3) / 4;
 	loc->wqe_free -= (ds + 3) / 4;
-	/* Request CQE generation if limits are reached. */
-	mlx5_tx_request_completion(txq, loc, true, olx);
 	return MLX5_TXCMP_CODE_MULTI;
 }
 
@@ -3388,8 +3380,6 @@ enum mlx5_txcmp_code {
 	wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
 	txq->wqe_ci += (ds + 3) / 4;
 	loc->wqe_free -= (ds + 3) / 4;
-	/* Request CQE generation if limits are reached. */
-	mlx5_tx_request_completion(txq, loc, true, olx);
 	return MLX5_TXCMP_CODE_MULTI;
 }
 
@@ -3599,8 +3589,6 @@ enum mlx5_txcmp_code {
 		--loc->elts_free;
 		++loc->pkts_sent;
 		--pkts_n;
-		/* Request CQE generation if limits are reached. */
-		mlx5_tx_request_completion(txq, loc, false, olx);
 		if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free))
 			return MLX5_TXCMP_CODE_EXIT;
 		loc->mbuf = *pkts++;
@@ -3750,7 +3738,7 @@ enum mlx5_txcmp_code {
 		   struct mlx5_txq_local *restrict loc,
 		   unsigned int ds,
 		   unsigned int slen,
-		   unsigned int olx)
+		   unsigned int olx __rte_unused)
 {
 	assert(!MLX5_TXOFF_CONFIG(INLINE));
 #ifdef MLX5_PMD_SOFT_COUNTERS
@@ -3765,8 +3753,6 @@ enum mlx5_txcmp_code {
 	loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
 	txq->wqe_ci += (ds + 3) / 4;
 	loc->wqe_free -= (ds + 3) / 4;
-	/* Request CQE generation if limits are reached. */
-	mlx5_tx_request_completion(txq, loc, false, olx);
 }
 
 /*
@@ -3809,8 +3795,6 @@ enum mlx5_txcmp_code {
 	loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len);
 	txq->wqe_ci += (len + 3) / 4;
 	loc->wqe_free -= (len + 3) / 4;
-	/* Request CQE generation if limits are reached. */
-	mlx5_tx_request_completion(txq, loc, false, olx);
 }
 
 /**
@@ -4011,8 +3995,6 @@ enum mlx5_txcmp_code {
 		txq->wqe_ci += (2 + part + 3) / 4;
 		loc->wqe_free -= (2 + part + 3) / 4;
 		pkts_n -= part;
-		/* Request CQE generation if limits are reached. */
-		mlx5_tx_request_completion(txq, loc, false, olx);
 		if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free))
 			return MLX5_TXCMP_CODE_EXIT;
 		loc->mbuf = *pkts++;
@@ -4496,8 +4478,6 @@ enum mlx5_txcmp_code {
 		}
 		++loc->pkts_sent;
 		--pkts_n;
-		/* Request CQE generation if limits are reached. */
-		mlx5_tx_request_completion(txq, loc, false, olx);
 		if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free))
 			return MLX5_TXCMP_CODE_EXIT;
 		loc->mbuf = *pkts++;
@@ -4776,6 +4756,8 @@ enum mlx5_txcmp_code {
 	/* Take a shortcut if nothing is sent. */
 	if (unlikely(loc.pkts_sent == loc.pkts_loop))
 		goto burst_exit;
+	/* Request CQE generation if limits are reached. */
+	mlx5_tx_request_completion(txq, &loc, olx);
 	/*
 	 * Ring QP doorbell immediately after WQE building completion
 	 * to improve latencies. The pure software related data treatment
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v3 2/4] net/mlx5: update Tx error handling routine
  2020-01-09 17:16 ` [dpdk-dev] [PATCH v3 " Viacheslav Ovsiienko
  2020-01-09 17:16   ` [dpdk-dev] [PATCH v3 1/4] net/mlx5: move Tx complete request routine Viacheslav Ovsiienko
@ 2020-01-09 17:16   ` Viacheslav Ovsiienko
  2020-01-09 17:16   ` [dpdk-dev] [PATCH v3 3/4] net/mlx5: add free on completion queue Viacheslav Ovsiienko
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 31+ messages in thread
From: Viacheslav Ovsiienko @ 2020-01-09 17:16 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland, orika

This is preparation step, we are going to store the index
of elts to free on completion in the dedicated free on
completion queue, this patch updates the elts freeing routine
and updates Tx error handling routine to be synched with
coming new queue.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 98 +++++++++++++++++++++++---------------------
 drivers/net/mlx5/mlx5_rxtx.h |  4 +-
 drivers/net/mlx5/mlx5_txq.c  |  2 +-
 3 files changed, 54 insertions(+), 50 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ee6d5fc..b7b40ac 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -654,10 +654,10 @@ enum mlx5_txcmp_code {
  *   Pointer to the error CQE.
  *
  * @return
- *   Negative value if queue recovery failed,
- *   the last Tx buffer element to free otherwise.
+ *   Negative value if queue recovery failed, otherwise
+ *   the error completion entry is handled successfully.
  */
-int
+static int
 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq,
 			 volatile struct mlx5_err_cqe *err_cqe)
 {
@@ -701,18 +701,14 @@ enum mlx5_txcmp_code {
 			 */
 			txq->stats.oerrors += ((txq->wqe_ci & wqe_m) -
 						new_wqe_pi) & wqe_m;
-		if (tx_recover_qp(txq_ctrl) == 0) {
-			txq->cq_ci++;
-			/* Release all the remaining buffers. */
-			return txq->elts_head;
+		if (tx_recover_qp(txq_ctrl)) {
+			/* Recovering failed - retry later on the same WQE. */
+			return -1;
 		}
-		/* Recovering failed - try again later on the same WQE. */
-		return -1;
-	} else {
-		txq->cq_ci++;
+		/* Release all the remaining buffers. */
+		txq_free_elts(txq_ctrl);
 	}
-	/* Do not release buffers. */
-	return txq->elts_tail;
+	return 0;
 }
 
 /**
@@ -2034,8 +2030,6 @@ enum mlx5_txcmp_code {
  *   Pointer to TX queue structure.
  * @param valid CQE pointer
  *   if not NULL update txq->wqe_pi and flush the buffers
- * @param itail
- *   if not negative - flush the buffers till this index.
  * @param olx
  *   Configured Tx offloads mask. It is fully defined at
  *   compile time and may be used for optimization.
@@ -2043,25 +2037,18 @@ enum mlx5_txcmp_code {
 static __rte_always_inline void
 mlx5_tx_comp_flush(struct mlx5_txq_data *restrict txq,
 		   volatile struct mlx5_cqe *last_cqe,
-		   int itail,
 		   unsigned int olx __rte_unused)
 {
-	uint16_t tail;
-
 	if (likely(last_cqe != NULL)) {
+		uint16_t tail;
+
 		txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter);
 		tail = ((volatile struct mlx5_wqe_cseg *)
 			(txq->wqes + (txq->wqe_pi & txq->wqe_m)))->misc;
-	} else if (itail >= 0) {
-		tail = (uint16_t)itail;
-	} else {
-		return;
-	}
-	rte_compiler_barrier();
-	*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
-	if (likely(tail != txq->elts_tail)) {
-		mlx5_tx_free_elts(txq, tail, olx);
-		assert(tail == txq->elts_tail);
+		if (likely(tail != txq->elts_tail)) {
+			mlx5_tx_free_elts(txq, tail, olx);
+			assert(tail == txq->elts_tail);
+		}
 	}
 }
 
@@ -2085,6 +2072,7 @@ enum mlx5_txcmp_code {
 {
 	unsigned int count = MLX5_TX_COMP_MAX_CQE;
 	volatile struct mlx5_cqe *last_cqe = NULL;
+	uint16_t ci = txq->cq_ci;
 	int ret;
 
 	static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value");
@@ -2092,8 +2080,8 @@ enum mlx5_txcmp_code {
 	do {
 		volatile struct mlx5_cqe *cqe;
 
-		cqe = &txq->cqes[txq->cq_ci & txq->cqe_m];
-		ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci);
+		cqe = &txq->cqes[ci & txq->cqe_m];
+		ret = check_cqe(cqe, txq->cqe_s, ci);
 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
 			if (likely(ret != MLX5_CQE_STATUS_ERR)) {
 				/* No new CQEs in completion queue. */
@@ -2109,31 +2097,49 @@ enum mlx5_txcmp_code {
 			rte_wmb();
 			ret = mlx5_tx_error_cqe_handle
 				(txq, (volatile struct mlx5_err_cqe *)cqe);
+			if (unlikely(ret < 0)) {
+				/*
+				 * Some error occurred on queue error
+				 * handling, we do not advance the index
+				 * here, allowing to retry on next call.
+				 */
+				return;
+			}
 			/*
-			 * Flush buffers, update consuming index
-			 * if recovery succeeded. Otherwise
-			 * just try to recover later.
+			 * We are going to fetch all entries with
+			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
 			 */
-			last_cqe = NULL;
-			break;
+			++ci;
+			continue;
 		}
 		/* Normal transmit completion. */
-		++txq->cq_ci;
+		++ci;
 		last_cqe = cqe;
 #ifndef NDEBUG
 		if (txq->cq_pi)
 			--txq->cq_pi;
 #endif
-	/*
-	 * We have to restrict the amount of processed CQEs
-	 * in one tx_burst routine call. The CQ may be large
-	 * and many CQEs may be updated by the NIC in one
-	 * transaction. Buffers freeing is time consuming,
-	 * multiple iterations may introduce significant
-	 * latency.
-	 */
-	} while (--count);
-	mlx5_tx_comp_flush(txq, last_cqe, ret, olx);
+		/*
+		 * We have to restrict the amount of processed CQEs
+		 * in one tx_burst routine call. The CQ may be large
+		 * and many CQEs may be updated by the NIC in one
+		 * transaction. Buffers freeing is time consuming,
+		 * multiple iterations may introduce significant
+		 * latency.
+		 */
+		if (--count == 0)
+			break;
+	} while (true);
+	if (likely(ci != txq->cq_ci)) {
+		/*
+		 * Update completion queue consuming index
+		 * and ring doorbell to notify hardware.
+		 */
+		rte_compiler_barrier();
+		txq->cq_ci = ci;
+		*txq->cq_db = rte_cpu_to_be_32(ci);
+		mlx5_tx_comp_flush(txq, last_cqe, olx);
+	}
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index e927343..8a2185a 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -440,6 +440,7 @@ struct mlx5_txq_ctrl *mlx5_txq_hairpin_new
 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_verify(struct rte_eth_dev *dev);
 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl);
+void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl);
 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev);
 
 /* mlx5_rxtx.c */
@@ -451,9 +452,6 @@ struct mlx5_txq_ctrl *mlx5_txq_hairpin_new
 void mlx5_set_ptype_table(void);
 void mlx5_set_cksum_table(void);
 void mlx5_set_swp_types_table(void);
-__rte_noinline int mlx5_tx_error_cqe_handle
-				(struct mlx5_txq_data *restrict txq,
-				 volatile struct mlx5_err_cqe *err_cqe);
 uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
 void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq);
 __rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec);
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 1c4f7e7..abe0947 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -62,7 +62,7 @@
  * @param txq_ctrl
  *   Pointer to TX queue structure.
  */
-static void
+void
 txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
 {
 	const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v3 3/4] net/mlx5: add free on completion queue
  2020-01-09 17:16 ` [dpdk-dev] [PATCH v3 " Viacheslav Ovsiienko
  2020-01-09 17:16   ` [dpdk-dev] [PATCH v3 1/4] net/mlx5: move Tx complete request routine Viacheslav Ovsiienko
  2020-01-09 17:16   ` [dpdk-dev] [PATCH v3 2/4] net/mlx5: update Tx error handling routine Viacheslav Ovsiienko
@ 2020-01-09 17:16   ` Viacheslav Ovsiienko
  2020-01-09 17:16   ` [dpdk-dev] [PATCH v3 4/4] net/mlx5: engage " Viacheslav Ovsiienko
  2020-01-13  9:36   ` [dpdk-dev] [PATCH v3 0/4] net/mlx5: remove Tx descriptor reserved field usage Raslan Darawsheh
  4 siblings, 0 replies; 31+ messages in thread
From: Viacheslav Ovsiienko @ 2020-01-09 17:16 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland, orika

The new software manged entity is introduced in Tx datapath
- free on completion queue. This queue keeps the information
how many buffers stored in elts array must freed on send
comletion. Each element of the queue contains transmitting
descriptor index to be in synch with completion entries (in
debug build only) and the index in elts array to free buffers.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.h |  5 +++++
 drivers/net/mlx5/mlx5_txq.c  | 15 +++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 8a2185a..7d1b2fa 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -297,6 +297,11 @@ struct mlx5_txq_data {
 	struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */
 	struct mlx5_wqe *wqes; /* Work queue. */
 	struct mlx5_wqe *wqes_end; /* Work queue array limit. */
+#ifdef NDEBUG
+	uint16_t *fcqs; /* Free completion queue. */
+#else
+	uint32_t *fcqs; /* Free completion queue (debug extended). */
+#endif
 	volatile struct mlx5_cqe *cqes; /* Completion queue. */
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index abe0947..aee0970 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -724,6 +724,17 @@ struct mlx5_txq_obj *
 	txq_data->wqe_pi = 0;
 	txq_data->wqe_comp = 0;
 	txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV;
+	txq_data->fcqs = rte_calloc_socket(__func__,
+					   txq_data->cqe_s,
+					   sizeof(*txq_data->fcqs),
+					   RTE_CACHE_LINE_SIZE,
+					   txq_ctrl->socket);
+	if (!txq_data->fcqs) {
+		DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory (FCQ)",
+			dev->data->port_id, idx);
+		rte_errno = ENOMEM;
+		goto error;
+	}
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	/*
 	 * If using DevX need to query and store TIS transport domain value.
@@ -772,6 +783,8 @@ struct mlx5_txq_obj *
 		claim_zero(mlx5_glue->destroy_cq(tmpl.cq));
 	if (tmpl.qp)
 		claim_zero(mlx5_glue->destroy_qp(tmpl.qp));
+	if (txq_data && txq_data->fcqs)
+		rte_free(txq_data->fcqs);
 	if (txq_obj)
 		rte_free(txq_obj);
 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
@@ -826,6 +839,8 @@ struct mlx5_txq_obj *
 		} else {
 			claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
 			claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
+				if (txq_obj->txq_ctrl->txq.fcqs)
+					rte_free(txq_obj->txq_ctrl->txq.fcqs);
 		}
 		LIST_REMOVE(txq_obj, next);
 		rte_free(txq_obj);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v3 4/4] net/mlx5: engage free on completion queue
  2020-01-09 17:16 ` [dpdk-dev] [PATCH v3 " Viacheslav Ovsiienko
                     ` (2 preceding siblings ...)
  2020-01-09 17:16   ` [dpdk-dev] [PATCH v3 3/4] net/mlx5: add free on completion queue Viacheslav Ovsiienko
@ 2020-01-09 17:16   ` Viacheslav Ovsiienko
  2020-01-13  9:36   ` [dpdk-dev] [PATCH v3 0/4] net/mlx5: remove Tx descriptor reserved field usage Raslan Darawsheh
  4 siblings, 0 replies; 31+ messages in thread
From: Viacheslav Ovsiienko @ 2020-01-09 17:16 UTC (permalink / raw)
  To: dev; +Cc: matan, rasland, orika

The free on completion queue keeps the indices of elts array,
all mbuf stored below this index should be freed on arrival
of normal send completion. In debug version it also contains
an index of completed transmitting descriptor (WQE) to check
queues synchronization.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 33 +++++++++++++++++----------------
 drivers/net/mlx5/mlx5_rxtx.h |  4 +---
 drivers/net/mlx5/mlx5_txq.c  |  2 --
 3 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index b7b40ac..b11c5eb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -2043,8 +2043,7 @@ enum mlx5_txcmp_code {
 		uint16_t tail;
 
 		txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter);
-		tail = ((volatile struct mlx5_wqe_cseg *)
-			(txq->wqes + (txq->wqe_pi & txq->wqe_m)))->misc;
+		tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m];
 		if (likely(tail != txq->elts_tail)) {
 			mlx5_tx_free_elts(txq, tail, olx);
 			assert(tail == txq->elts_tail);
@@ -2095,6 +2094,7 @@ enum mlx5_txcmp_code {
 			 * here, before we might perform SQ reset.
 			 */
 			rte_wmb();
+			txq->cq_ci = ci;
 			ret = mlx5_tx_error_cqe_handle
 				(txq, (volatile struct mlx5_err_cqe *)cqe);
 			if (unlikely(ret < 0)) {
@@ -2108,17 +2108,18 @@ enum mlx5_txcmp_code {
 			/*
 			 * We are going to fetch all entries with
 			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
+			 * The send queue is supposed to be empty.
 			 */
 			++ci;
+			txq->cq_pi = ci;
+			last_cqe = NULL;
 			continue;
 		}
 		/* Normal transmit completion. */
+		assert(ci != txq->cq_pi);
+		assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe->wqe_counter);
 		++ci;
 		last_cqe = cqe;
-#ifndef NDEBUG
-		if (txq->cq_pi)
-			--txq->cq_pi;
-#endif
 		/*
 		 * We have to restrict the amount of processed CQEs
 		 * in one tx_burst routine call. The CQ may be large
@@ -2127,7 +2128,7 @@ enum mlx5_txcmp_code {
 		 * multiple iterations may introduce significant
 		 * latency.
 		 */
-		if (--count == 0)
+		if (likely(--count == 0))
 			break;
 	} while (true);
 	if (likely(ci != txq->cq_ci)) {
@@ -2177,15 +2178,15 @@ enum mlx5_txcmp_code {
 		/* Request unconditional completion on last WQE. */
 		last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
 					    MLX5_COMP_MODE_OFFSET);
-		/* Save elts_head in unused "immediate" field of WQE. */
-		last->cseg.misc = head;
-		/*
-		 * A CQE slot must always be available. Count the
-		 * issued CEQ "always" request instead of production
-		 * index due to here can be CQE with errors and
-		 * difference with ci may become inconsistent.
-		 */
-		assert(txq->cqe_s > ++txq->cq_pi);
+		/* Save elts_head in dedicated free on completion queue. */
+#ifdef NDEBUG
+		txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head;
+#else
+		txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head |
+					(last->cseg.opcode >> 8) << 16;
+#endif
+		/* A CQE slot must always be available. */
+		assert((txq->cq_pi - txq->cq_ci) <= txq->cqe_s);
 	}
 }
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 7d1b2fa..e362b4a 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -273,9 +273,7 @@ struct mlx5_txq_data {
 	uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */
 	/* WQ related fields. */
 	uint16_t cq_ci; /* Consumer index for completion queue. */
-#ifndef NDEBUG
-	uint16_t cq_pi; /* Counter of issued CQE "always" requests. */
-#endif
+	uint16_t cq_pi; /* Production index for completion queue. */
 	uint16_t cqe_s; /* Number of CQ elements. */
 	uint16_t cqe_m; /* Mask for CQ indices. */
 	/* CQ related fields. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index aee0970..c750082 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -717,9 +717,7 @@ struct mlx5_txq_obj *
 	txq_data->cq_db = cq_info.dbrec;
 	txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf;
 	txq_data->cq_ci = 0;
-#ifndef NDEBUG
 	txq_data->cq_pi = 0;
-#endif
 	txq_data->wqe_ci = 0;
 	txq_data->wqe_pi = 0;
 	txq_data->wqe_comp = 0;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion queue
  2020-01-09 16:22           ` Slava Ovsiienko
@ 2020-01-10  9:06             ` Thomas Monjalon
  2020-01-10  9:28               ` Slava Ovsiienko
  0 siblings, 1 reply; 31+ messages in thread
From: Thomas Monjalon @ 2020-01-10  9:06 UTC (permalink / raw)
  To: Slava Ovsiienko; +Cc: Ferruh Yigit, dev, Matan Azrad, Raslan Darawsheh, Ori Kam

09/01/2020 17:22, Slava Ovsiienko:
> From: Ferruh Yigit <ferruh.yigit@intel.com>
> > On 1/9/2020 3:27 PM, Slava Ovsiienko wrote:
> > > From: Ferruh Yigit <ferruh.yigit@intel.com>
> > >> On 1/9/2020 10:56 AM, Viacheslav Ovsiienko wrote:
> > >>> +		assert(ci != txq->cq_pi);
> > >>> +		assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe-
> > >>> wqe_counter);
> > >>
> > >> And same comments on these as previous patches, we spend some effort
> > >> to remove the 'rte_panic' from drivers, this is almost same thing.
> > >>
> > >> I think a driver shouldn't decide to exit whole application, it's
> > >> effect should be limited to the driver.
> > >>
> > >> Assert is useful for debug and during development, but not sure
> > >> having them in the production code.
> > >
> > > IIRC, "assert" is standard C function. Compiled only if there is no NDEBUG
> > defined.
> > > So, assert does exactly what you are saying - provide the debug break
> > > not allowing the bug to evolve. And no this break in production code.
> > >
> > 
> > Since mlx driver is using NDEBUG defined, what you said is right indeed. But
> > why not using RTE_ASSERT to be consistent with rest. There is a specific config
> > option to control assert (RTE_ENABLE_ASSERT) and anyone using it will get
> > different behavior with mlx5.
> 
> We have the dedicated option to control mlx5 debug:
> CONFIG_RTE_ENABLE_ASSERT controls the whole DPDK.

No, it controls the whole DPDK except mlx PMDs.

> CONFIG_RTE_LIBRTE_MLX5_DEBUG controls NDEBUG for mlx5
> 
> From my practice - I switch the mlx5 debug option (in the process of the debugging/testing
> datapath and checking the resulting performance, by directly defining NDEBUG in mlx5.h and
> not reconfiguring/rebuilding the entire DPDK), this fine grained option seems to be useful.

I don't like having mlx PMDs behave differently.
It make things difficult for newcomers.
And with meson, such options are cleaned up.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion queue
  2020-01-10  9:06             ` Thomas Monjalon
@ 2020-01-10  9:28               ` Slava Ovsiienko
  2020-01-10  9:34                 ` Thomas Monjalon
  0 siblings, 1 reply; 31+ messages in thread
From: Slava Ovsiienko @ 2020-01-10  9:28 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: Ferruh Yigit, dev, Matan Azrad, Raslan Darawsheh, Ori Kam

> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Friday, January 10, 2020 11:07
> To: Slava Ovsiienko <viacheslavo@mellanox.com>
> Cc: Ferruh Yigit <ferruh.yigit@intel.com>; dev@dpdk.org; Matan Azrad
> <matan@mellanox.com>; Raslan Darawsheh <rasland@mellanox.com>; Ori
> Kam <orika@mellanox.com>
> Subject: Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion
> queue
> 
> 09/01/2020 17:22, Slava Ovsiienko:
> > From: Ferruh Yigit <ferruh.yigit@intel.com>
> > > On 1/9/2020 3:27 PM, Slava Ovsiienko wrote:
> > > > From: Ferruh Yigit <ferruh.yigit@intel.com>
> > > >> On 1/9/2020 10:56 AM, Viacheslav Ovsiienko wrote:
> > > >>> +		assert(ci != txq->cq_pi);
> > > >>> +		assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe-
> > > >>> wqe_counter);
> > > >>
> > > >> And same comments on these as previous patches, we spend some
> > > >> effort to remove the 'rte_panic' from drivers, this is almost same thing.
> > > >>
> > > >> I think a driver shouldn't decide to exit whole application, it's
> > > >> effect should be limited to the driver.
> > > >>
> > > >> Assert is useful for debug and during development, but not sure
> > > >> having them in the production code.
> > > >
> > > > IIRC, "assert" is standard C function. Compiled only if there is
> > > > no NDEBUG
> > > defined.
> > > > So, assert does exactly what you are saying - provide the debug
> > > > break not allowing the bug to evolve. And no this break in production
> code.
> > > >
> > >
> > > Since mlx driver is using NDEBUG defined, what you said is right
> > > indeed. But why not using RTE_ASSERT to be consistent with rest.
> > > There is a specific config option to control assert
> > > (RTE_ENABLE_ASSERT) and anyone using it will get different behavior with
> mlx5.
> >
> > We have the dedicated option to control mlx5 debug:
> > CONFIG_RTE_ENABLE_ASSERT controls the whole DPDK.
> 
> No, it controls the whole DPDK except mlx PMDs.
> 
> > CONFIG_RTE_LIBRTE_MLX5_DEBUG controls NDEBUG for mlx5
> >
> > From my practice - I switch the mlx5 debug option (in the process of
> > the debugging/testing datapath and checking the resulting performance,
> > by directly defining NDEBUG in mlx5.h and not reconfiguring/rebuilding the
> entire DPDK), this fine grained option seems to be useful.
> 
> I don't like having mlx PMDs behave differently.
> It make things difficult for newcomers.
> And with meson, such options are cleaned up.

Do you mean we should eliminate NDEBUG usage and convert it to some explicit "MLX5_NDEBUG"
(and convert "assert" to "MLX5_ASSERT") ? 

With best regards, Slava



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion queue
  2020-01-10  9:28               ` Slava Ovsiienko
@ 2020-01-10  9:34                 ` Thomas Monjalon
  2020-01-10  9:55                   ` Slava Ovsiienko
  0 siblings, 1 reply; 31+ messages in thread
From: Thomas Monjalon @ 2020-01-10  9:34 UTC (permalink / raw)
  To: Slava Ovsiienko; +Cc: Ferruh Yigit, dev, Matan Azrad, Raslan Darawsheh, Ori Kam

10/01/2020 10:28, Slava Ovsiienko:
> From: Thomas Monjalon <thomas@monjalon.net>
> > 09/01/2020 17:22, Slava Ovsiienko:
> > > From: Ferruh Yigit <ferruh.yigit@intel.com>
> > > > On 1/9/2020 3:27 PM, Slava Ovsiienko wrote:
> > > > > From: Ferruh Yigit <ferruh.yigit@intel.com>
> > > > >> On 1/9/2020 10:56 AM, Viacheslav Ovsiienko wrote:
> > > > >>> +		assert(ci != txq->cq_pi);
> > > > >>> +		assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe-
> > > > >>> wqe_counter);
> > > > >>
> > > > >> And same comments on these as previous patches, we spend some
> > > > >> effort to remove the 'rte_panic' from drivers, this is almost same thing.
> > > > >>
> > > > >> I think a driver shouldn't decide to exit whole application, it's
> > > > >> effect should be limited to the driver.
> > > > >>
> > > > >> Assert is useful for debug and during development, but not sure
> > > > >> having them in the production code.
> > > > >
> > > > > IIRC, "assert" is standard C function. Compiled only if there is
> > > > > no NDEBUG
> > > > defined.
> > > > > So, assert does exactly what you are saying - provide the debug
> > > > > break not allowing the bug to evolve. And no this break in production
> > code.
> > > > >
> > > >
> > > > Since mlx driver is using NDEBUG defined, what you said is right
> > > > indeed. But why not using RTE_ASSERT to be consistent with rest.
> > > > There is a specific config option to control assert
> > > > (RTE_ENABLE_ASSERT) and anyone using it will get different behavior with
> > mlx5.
> > >
> > > We have the dedicated option to control mlx5 debug:
> > > CONFIG_RTE_ENABLE_ASSERT controls the whole DPDK.
> > 
> > No, it controls the whole DPDK except mlx PMDs.
> > 
> > > CONFIG_RTE_LIBRTE_MLX5_DEBUG controls NDEBUG for mlx5
> > >
> > > From my practice - I switch the mlx5 debug option (in the process of
> > > the debugging/testing datapath and checking the resulting performance,
> > > by directly defining NDEBUG in mlx5.h and not reconfiguring/rebuilding the
> > entire DPDK), this fine grained option seems to be useful.
> > 
> > I don't like having mlx PMDs behave differently.
> > It make things difficult for newcomers.
> > And with meson, such options are cleaned up.
> 
> Do you mean we should eliminate NDEBUG usage and convert it to some explicit "MLX5_NDEBUG"
> (and convert "assert" to "MLX5_ASSERT") ? 

I mean we should use RTE_ASSERT in mlx5, as it is already done in some files.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion queue
  2020-01-10  9:34                 ` Thomas Monjalon
@ 2020-01-10  9:55                   ` Slava Ovsiienko
  2020-01-10 13:11                     ` Thomas Monjalon
  0 siblings, 1 reply; 31+ messages in thread
From: Slava Ovsiienko @ 2020-01-10  9:55 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: Ferruh Yigit, dev, Matan Azrad, Raslan Darawsheh, Ori Kam

> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Friday, January 10, 2020 11:34
> To: Slava Ovsiienko <viacheslavo@mellanox.com>
> Cc: Ferruh Yigit <ferruh.yigit@intel.com>; dev@dpdk.org; Matan Azrad
> <matan@mellanox.com>; Raslan Darawsheh <rasland@mellanox.com>; Ori
> Kam <orika@mellanox.com>
> Subject: Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion
> queue
> 
> 10/01/2020 10:28, Slava Ovsiienko:
> > From: Thomas Monjalon <thomas@monjalon.net>
> > > 09/01/2020 17:22, Slava Ovsiienko:
> > > > From: Ferruh Yigit <ferruh.yigit@intel.com>
> > > > > On 1/9/2020 3:27 PM, Slava Ovsiienko wrote:
> > > > > > From: Ferruh Yigit <ferruh.yigit@intel.com>
> > > > > >> On 1/9/2020 10:56 AM, Viacheslav Ovsiienko wrote:
> > > > > >>> +		assert(ci != txq->cq_pi);
> > > > > >>> +		assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe-
> > > > > >>> wqe_counter);
> > > > > >>
> > > > > >> And same comments on these as previous patches, we spend some
> > > > > >> effort to remove the 'rte_panic' from drivers, this is almost same
> thing.
> > > > > >>
> > > > > >> I think a driver shouldn't decide to exit whole application,
> > > > > >> it's effect should be limited to the driver.
> > > > > >>
> > > > > >> Assert is useful for debug and during development, but not
> > > > > >> sure having them in the production code.
> > > > > >
> > > > > > IIRC, "assert" is standard C function. Compiled only if there
> > > > > > is no NDEBUG
> > > > > defined.
> > > > > > So, assert does exactly what you are saying - provide the
> > > > > > debug break not allowing the bug to evolve. And no this break
> > > > > > in production
> > > code.
> > > > > >
> > > > >
> > > > > Since mlx driver is using NDEBUG defined, what you said is right
> > > > > indeed. But why not using RTE_ASSERT to be consistent with rest.
> > > > > There is a specific config option to control assert
> > > > > (RTE_ENABLE_ASSERT) and anyone using it will get different
> > > > > behavior with
> > > mlx5.
> > > >
> > > > We have the dedicated option to control mlx5 debug:
> > > > CONFIG_RTE_ENABLE_ASSERT controls the whole DPDK.
> > >
> > > No, it controls the whole DPDK except mlx PMDs.
> > >
> > > > CONFIG_RTE_LIBRTE_MLX5_DEBUG controls NDEBUG for mlx5
> > > >
> > > > From my practice - I switch the mlx5 debug option (in the process
> > > > of the debugging/testing datapath and checking the resulting
> > > > performance, by directly defining NDEBUG in mlx5.h and not
> > > > reconfiguring/rebuilding the
> > > entire DPDK), this fine grained option seems to be useful.
> > >
> > > I don't like having mlx PMDs behave differently.
> > > It make things difficult for newcomers.
> > > And with meson, such options are cleaned up.
> >
> > Do you mean we should eliminate NDEBUG usage and convert it to some
> explicit "MLX5_NDEBUG"
> > (and convert "assert" to "MLX5_ASSERT") ?
> 
> I mean we should use RTE_ASSERT in mlx5, as it is already done in some files.
> 
This would make not possible to engage asserts  in mlx5 module only.
It is a question of structuring/layering, not "different behavior".
As for me - it is very nice to have fine grained debug control option,
and I use this feature actively, it just saves my time. Also, it seems 
these options are implemented in many other PMDs
(with its own xxx_ASSERTs).

With best regards, Slava

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion queue
  2020-01-10  9:55                   ` Slava Ovsiienko
@ 2020-01-10 13:11                     ` Thomas Monjalon
  2020-01-10 13:42                       ` Slava Ovsiienko
  0 siblings, 1 reply; 31+ messages in thread
From: Thomas Monjalon @ 2020-01-10 13:11 UTC (permalink / raw)
  To: Slava Ovsiienko; +Cc: Ferruh Yigit, dev, Matan Azrad, Raslan Darawsheh, Ori Kam

10/01/2020 10:55, Slava Ovsiienko:
> From: Thomas Monjalon <thomas@monjalon.net>
> > 10/01/2020 10:28, Slava Ovsiienko:
> > > From: Thomas Monjalon <thomas@monjalon.net>
> > > > 09/01/2020 17:22, Slava Ovsiienko:
> > > > > From: Ferruh Yigit <ferruh.yigit@intel.com>
> > > > > > On 1/9/2020 3:27 PM, Slava Ovsiienko wrote:
> > > > > > > From: Ferruh Yigit <ferruh.yigit@intel.com>
> > > > > > >> On 1/9/2020 10:56 AM, Viacheslav Ovsiienko wrote:
> > > > > > >>> +		assert(ci != txq->cq_pi);
> > > > > > >>> +		assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe-
> > > > > > >>> wqe_counter);
> > > > > > >>
> > > > > > >> And same comments on these as previous patches, we spend some
> > > > > > >> effort to remove the 'rte_panic' from drivers, this is almost same
> > thing.
> > > > > > >>
> > > > > > >> I think a driver shouldn't decide to exit whole application,
> > > > > > >> it's effect should be limited to the driver.
> > > > > > >>
> > > > > > >> Assert is useful for debug and during development, but not
> > > > > > >> sure having them in the production code.
> > > > > > >
> > > > > > > IIRC, "assert" is standard C function. Compiled only if there
> > > > > > > is no NDEBUG
> > > > > > defined.
> > > > > > > So, assert does exactly what you are saying - provide the
> > > > > > > debug break not allowing the bug to evolve. And no this break
> > > > > > > in production
> > > > code.
> > > > > > >
> > > > > >
> > > > > > Since mlx driver is using NDEBUG defined, what you said is right
> > > > > > indeed. But why not using RTE_ASSERT to be consistent with rest.
> > > > > > There is a specific config option to control assert
> > > > > > (RTE_ENABLE_ASSERT) and anyone using it will get different
> > > > > > behavior with
> > > > mlx5.
> > > > >
> > > > > We have the dedicated option to control mlx5 debug:
> > > > > CONFIG_RTE_ENABLE_ASSERT controls the whole DPDK.
> > > >
> > > > No, it controls the whole DPDK except mlx PMDs.
> > > >
> > > > > CONFIG_RTE_LIBRTE_MLX5_DEBUG controls NDEBUG for mlx5
> > > > >
> > > > > From my practice - I switch the mlx5 debug option (in the process
> > > > > of the debugging/testing datapath and checking the resulting
> > > > > performance, by directly defining NDEBUG in mlx5.h and not
> > > > > reconfiguring/rebuilding the
> > > > entire DPDK), this fine grained option seems to be useful.
> > > >
> > > > I don't like having mlx PMDs behave differently.
> > > > It make things difficult for newcomers.
> > > > And with meson, such options are cleaned up.
> > >
> > > Do you mean we should eliminate NDEBUG usage and convert it to some
> > explicit "MLX5_NDEBUG"
> > > (and convert "assert" to "MLX5_ASSERT") ?
> > 
> > I mean we should use RTE_ASSERT in mlx5, as it is already done in some files.
> > 
> This would make not possible to engage asserts  in mlx5 module only.
> It is a question of structuring/layering, not "different behavior".
> As for me - it is very nice to have fine grained debug control option,
> and I use this feature actively, it just saves my time. Also, it seems 
> these options are implemented in many other PMDs
> (with its own xxx_ASSERTs).

I disagree, it is not nice. It makes it more complicate to use.
Can you imagine every file having its own tools and configs
in a project? As a maintainer, my role is to make things simpler
for everyone in general so we can know easily how things work.

About time saving, I also disagree. If you enable assert for the whole project
during all your development, it is a good practice which does not cost any time.

About other PMDs, they must be fixed.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion queue
  2020-01-10 13:11                     ` Thomas Monjalon
@ 2020-01-10 13:42                       ` Slava Ovsiienko
  0 siblings, 0 replies; 31+ messages in thread
From: Slava Ovsiienko @ 2020-01-10 13:42 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: Ferruh Yigit, dev, Matan Azrad, Raslan Darawsheh, Ori Kam

> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Friday, January 10, 2020 15:11
> To: Slava Ovsiienko <viacheslavo@mellanox.com>
> Cc: Ferruh Yigit <ferruh.yigit@intel.com>; dev@dpdk.org; Matan Azrad
> <matan@mellanox.com>; Raslan Darawsheh <rasland@mellanox.com>; Ori
> Kam <orika@mellanox.com>
> Subject: Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion
> queue
> 
> 10/01/2020 10:55, Slava Ovsiienko:
> > From: Thomas Monjalon <thomas@monjalon.net>
> > > 10/01/2020 10:28, Slava Ovsiienko:
> > > > From: Thomas Monjalon <thomas@monjalon.net>
> > > > > 09/01/2020 17:22, Slava Ovsiienko:
> > > > > > From: Ferruh Yigit <ferruh.yigit@intel.com>
> > > > > > > On 1/9/2020 3:27 PM, Slava Ovsiienko wrote:
> > > > > > > > From: Ferruh Yigit <ferruh.yigit@intel.com>
> > > > > > > >> On 1/9/2020 10:56 AM, Viacheslav Ovsiienko wrote:
> > > > > > > >>> +		assert(ci != txq->cq_pi);
> > > > > > > >>> +		assert((txq->fcqs[ci & txq->cqe_m] >> 16) ==
> cqe-
> > > > > > > >>> wqe_counter);
> > > > > > > >>
> > > > > > > >> And same comments on these as previous patches, we spend
> > > > > > > >> some effort to remove the 'rte_panic' from drivers, this
> > > > > > > >> is almost same
> > > thing.
> > > > > > > >>
> > > > > > > >> I think a driver shouldn't decide to exit whole
> > > > > > > >> application, it's effect should be limited to the driver.
> > > > > > > >>
> > > > > > > >> Assert is useful for debug and during development, but
> > > > > > > >> not sure having them in the production code.
> > > > > > > >
> > > > > > > > IIRC, "assert" is standard C function. Compiled only if
> > > > > > > > there is no NDEBUG
> > > > > > > defined.
> > > > > > > > So, assert does exactly what you are saying - provide the
> > > > > > > > debug break not allowing the bug to evolve. And no this
> > > > > > > > break in production
> > > > > code.
> > > > > > > >
> > > > > > >
> > > > > > > Since mlx driver is using NDEBUG defined, what you said is
> > > > > > > right indeed. But why not using RTE_ASSERT to be consistent with
> rest.
> > > > > > > There is a specific config option to control assert
> > > > > > > (RTE_ENABLE_ASSERT) and anyone using it will get different
> > > > > > > behavior with
> > > > > mlx5.
> > > > > >
> > > > > > We have the dedicated option to control mlx5 debug:
> > > > > > CONFIG_RTE_ENABLE_ASSERT controls the whole DPDK.
> > > > >
> > > > > No, it controls the whole DPDK except mlx PMDs.
> > > > >
> > > > > > CONFIG_RTE_LIBRTE_MLX5_DEBUG controls NDEBUG for mlx5
> > > > > >
> > > > > > From my practice - I switch the mlx5 debug option (in the
> > > > > > process of the debugging/testing datapath and checking the
> > > > > > resulting performance, by directly defining NDEBUG in mlx5.h
> > > > > > and not reconfiguring/rebuilding the
> > > > > entire DPDK), this fine grained option seems to be useful.
> > > > >
> > > > > I don't like having mlx PMDs behave differently.
> > > > > It make things difficult for newcomers.
> > > > > And with meson, such options are cleaned up.
> > > >
> > > > Do you mean we should eliminate NDEBUG usage and convert it to
> > > > some
> > > explicit "MLX5_NDEBUG"
> > > > (and convert "assert" to "MLX5_ASSERT") ?
> > >
> > > I mean we should use RTE_ASSERT in mlx5, as it is already done in some
> files.
> > >
> > This would make not possible to engage asserts  in mlx5 module only.
> > It is a question of structuring/layering, not "different behavior".
> > As for me - it is very nice to have fine grained debug control option,
> > and I use this feature actively, it just saves my time. Also, it seems
> > these options are implemented in many other PMDs (with its own
> > xxx_ASSERTs).
> 
> I disagree, it is not nice. It makes it more complicate to use.
> Can you imagine every file having its own tools and configs in a project? As a
> maintainer, my role is to make things simpler for everyone in general so we
> can know easily how things work.

Not every file, but every module. It is quite common practice to have local
configuration options for module in the large projects. So, it is native for
PMDs to have its dedicated configuration options. And what we have 
currently follows this approach. 

> 
> About time saving, I also disagree. If you enable assert for the whole project
> during all your development, it is a good practice which does not cost any
> time.
During the day I might switch multiple times between debug (with assert enabled)
and performance check (debug/assert disabled) modes. Now I can switch it easily and quickly,
just commenting [out] NDEBUG in mlx5.h. In the large projects the global configuration changing price
is getting higher. You just propose to pay this price ☹ - I would have to reconfig/recompile
the entire DPDK. The same might concern the other PMDs - many of them have the private
debug option(s).

> 
> About other PMDs, they must be fixed.
I suppose the developers of other PMDs use the module debug options either :)
 
> 
I agree the NDEBUG is "different behavior", we should think how to eliminate it.
But I do not understand why we should drop the partial configuration, the feature that is actively used.
Now code is split into several debug domains (at least for assert), we can control each domain in independent
fashion, and the practice shows it is useful and saves developer's time. DPDK is the large project definitely,
it contains a lot of modules, we can't address all development needs with one simple common configuration
option.

Having the module/private debug options does not eliminate the introducing the global control -
say "RTE_CONFIG_ENABLE_MODULE_DEBUG_OPTIONS" to provide production code generation
"in one click", but just dropping the module debug options is not nice as for me.

With best regards, Slava



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage
  2020-01-09 10:56 ` [dpdk-dev] [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
                     ` (4 preceding siblings ...)
  2020-01-09 14:22   ` [dpdk-dev] [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage Raslan Darawsheh
@ 2020-01-13  9:35   ` Raslan Darawsheh
  5 siblings, 0 replies; 31+ messages in thread
From: Raslan Darawsheh @ 2020-01-13  9:35 UTC (permalink / raw)
  To: Slava Ovsiienko, dev; +Cc: Matan Azrad, Ori Kam, Ferruh Yigit

Hi,

This series will be removed from next-net-mlx, will apply V3 instead.

Kindest regards,
Raslan Darawsheh

> -----Original Message-----
> From: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> Sent: Thursday, January 9, 2020 12:56 PM
> To: dev@dpdk.org
> Cc: Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
> <rasland@mellanox.com>; Ori Kam <orika@mellanox.com>
> Subject: [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage
> 
> The current Tx datapath implementation in mlx5 PMD uses the 16-bit
> reserved field within transmit descriptor to store the indices of the elts array
> keeping the mbuf pointers to be freed on transmit completion. On
> completion PMD fetches the descriptor index, then fetches the elts array
> index from reserved field and frees the mbufs.
> 
> The new ConnectX-6DX NIC might use this reserved descriptor field and
> existing implementation might not work in intended way.
> To resolve this issue the dedicated buffer is introduced to store indices to
> instead of descriptor field.
> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> 
> Viacheslav Ovsiienko (4):
>   net/mlx5: move Tx complete request routine
>   net/mlx5: update Tx error handling routine
>   net/mlx5: add free on completion queue
>   net/mlx5: engage free on completion queue
> 
>  drivers/net/mlx5/mlx5_rxtx.c | 153 ++++++++++++++++++++-----------------
> ------
>  drivers/net/mlx5/mlx5_rxtx.h |  13 ++--  drivers/net/mlx5/mlx5_txq.c  |  19
> +++++-
>  3 files changed, 94 insertions(+), 91 deletions(-)
> 
> --
> v1:
> https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch
> es.dpdk.org%2Fcover%2F64293%2F&amp;data=02%7C01%7Crasland%40mell
> anox.com%7C56986b5d3d3c46f2725b08d794f297e6%7Ca652971c7d2e4d9ba6
> a4d149256f461b%7C0%7C0%7C637141641963098885&amp;sdata=RP4VgjCQlp
> oJc5J38aajK9Rr8twtJ4d%2FSVP2JxM5C98%3D&amp;reserved=0
> v2: resolve minor compilation per patch issues
> 
> 1.8.3.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/4] net/mlx5: remove Tx descriptor reserved field usage
  2020-01-09 17:16 ` [dpdk-dev] [PATCH v3 " Viacheslav Ovsiienko
                     ` (3 preceding siblings ...)
  2020-01-09 17:16   ` [dpdk-dev] [PATCH v3 4/4] net/mlx5: engage " Viacheslav Ovsiienko
@ 2020-01-13  9:36   ` Raslan Darawsheh
  4 siblings, 0 replies; 31+ messages in thread
From: Raslan Darawsheh @ 2020-01-13  9:36 UTC (permalink / raw)
  To: Slava Ovsiienko, dev; +Cc: Matan Azrad, Ori Kam

Hi,

> -----Original Message-----
> From: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> Sent: Thursday, January 9, 2020 7:16 PM
> To: dev@dpdk.org
> Cc: Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
> <rasland@mellanox.com>; Ori Kam <orika@mellanox.com>
> Subject: [PATCH v3 0/4] net/mlx5: remove Tx descriptor reserved field usage
> 
> The current Tx datapath implementation in mlx5 PMD uses the 16-bit
> reserved field within transmit descriptor to store the indices of the elts array
> keeping the mbuf pointers to be freed on transmit completion. On
> completion PMD fetches the descriptor index, then fetches the elts array
> index from reserved field and frees the mbufs.
> 
> The new ConnectX-6DX NIC might use this reserved descriptor field and
> existing implementation might not work in intended way.
> To resolve this issue the dedicated buffer is introduced to store indices to
> instead of descriptor field.
> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> 
> Viacheslav Ovsiienko (4):
>   net/mlx5: move Tx complete request routine
>   net/mlx5: update Tx error handling routine
>   net/mlx5: add free on completion queue
>   net/mlx5: engage free on completion queue
> 
>  drivers/net/mlx5/mlx5_rxtx.c | 153 ++++++++++++++++++++-----------------
> ------
>  drivers/net/mlx5/mlx5_rxtx.h |  13 ++--  drivers/net/mlx5/mlx5_txq.c  |  19
> +++++-
>  3 files changed, 94 insertions(+), 91 deletions(-)
> 
> --
> v1: -
> https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch
> es.dpdk.org%2Fcover%2F64293%2F&amp;data=02%7C01%7Crasland%40mell
> anox.com%7C2581e58267ee4d46f6db08d79527c2da%7Ca652971c7d2e4d9ba6
> a4d149256f461b%7C0%7C0%7C637141870319356462&amp;sdata=rSy16E3H9
> OqxKekFtNTPSii3%2FPrKK1xjvEVddF14oMw%3D&amp;reserved=0
> v2: -
> https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch
> es.dpdk.org%2Fcover%2F64331%2F&amp;data=02%7C01%7Crasland%40mell
> anox.com%7C2581e58267ee4d46f6db08d79527c2da%7Ca652971c7d2e4d9ba6
> a4d149256f461b%7C0%7C0%7C637141870319356462&amp;sdata=N3XndSHXY
> Ae73Ubu%2BNNlr6CBDt6ciq7HKxNnuerHpzo%3D&amp;reserved=0
>     - resolve minor compilation per patch issues
> v3: - change the fcq entry type to uint16_t in non-debug mode
> 
> 1.8.3.1

Series applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion queue
  2020-01-09 15:43         ` Ferruh Yigit
  2020-01-09 16:22           ` Slava Ovsiienko
@ 2020-01-17 10:44           ` Slava Ovsiienko
  1 sibling, 0 replies; 31+ messages in thread
From: Slava Ovsiienko @ 2020-01-17 10:44 UTC (permalink / raw)
  To: Ferruh Yigit, dev; +Cc: Matan Azrad, Raslan Darawsheh, Ori Kam, Thomas Monjalon

> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@intel.com>
> Sent: Thursday, January 9, 2020 17:44
> To: Slava Ovsiienko <viacheslavo@mellanox.com>; dev@dpdk.org
> Cc: Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
> <rasland@mellanox.com>; Ori Kam <orika@mellanox.com>; Thomas
> Monjalon <thomas@monjalon.net>
> Subject: Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on completion
> queue
> 
> On 1/9/2020 3:27 PM, Slava Ovsiienko wrote:
> >> -----Original Message-----
> >> From: Ferruh Yigit <ferruh.yigit@intel.com>
> >> Sent: Thursday, January 9, 2020 17:19
> >> To: Slava Ovsiienko <viacheslavo@mellanox.com>; dev@dpdk.org
> >> Cc: Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
> >> <rasland@mellanox.com>; Ori Kam <orika@mellanox.com>; Thomas
> Monjalon
> >> <thomas@monjalon.net>
> >> Subject: Re: [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage free on
> >> completion queue
> >>
> >> On 1/9/2020 10:56 AM, Viacheslav Ovsiienko wrote:
> >>> The free on completion queue keeps the indices of elts array, all
> >>> mbuf stored below this index should be freed on arrival of normal
> >>> send completion. In debug version it also contains an index of
> >>> completed transmitting descriptor (WQE) to check queues
> synchronization.
> >>>
> >>> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> >>> Acked-by: Matan Azrad <matan@mellanox.com>
> >>
> >> <...>
> >>
> >>> @@ -2108,17 +2108,18 @@ enum mlx5_txcmp_code {
> >>>  			/*
> >>>  			 * We are going to fetch all entries with
> >>>  			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
> >>> +			 * The send queue is supposed to be empty.
> >>>  			 */
> >>>  			++ci;
> >>> +			txq->cq_pi = ci;
> >>> +			last_cqe = NULL;
> >>>  			continue;
> >>>  		}
> >>>  		/* Normal transmit completion. */
> >>> +		assert(ci != txq->cq_pi);
> >>> +		assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe-
> >>> wqe_counter);
> >>
> >> And same comments on these as previous patches, we spend some effort
> >> to remove the 'rte_panic' from drivers, this is almost same thing.
> >>
> >> I think a driver shouldn't decide to exit whole application, it's
> >> effect should be limited to the driver.
> >>
> >> Assert is useful for debug and during development, but not sure
> >> having them in the production code.
> >
> > IIRC, "assert" is standard C function. Compiled only if there is no NDEBUG
> defined.
> > So, assert does exactly what you are saying - provide the debug break
> > not allowing the bug to evolve. And no this break in production code.
> >
> 
> Since mlx driver is using NDEBUG defined, what you said is right indeed. But
> why not using RTE_ASSERT to be consistent with rest. There is a specific config
> option to control assert (RTE_ENABLE_ASSERT) and anyone using it will get
> different behavior with mlx5.

OK, to summarize (after discussions with Ferruh and Thomas):
1. assert/NDEBUG in mlx5 should be fixed
2 in mlx5 assert should be replaced by MLX5_ASSERT macro, it provides "in-development" assert control if needed
3. mlx5 PMDs local debug configuration option will be removed from global build configuration 
(it is absent in meson build now, and as makefile is subject to drop - this option will not be supported anymore)
4. The patch in subject ("net/mlx5: engage free on completion queue") is not related to bullets 1-3 and
   should be accepted to Upstream, and the is the claimed commitment from my side to provide
  the dedicated patch for 1-3.

With best regards, Slava

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2020-01-17 10:44 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-08 16:15 [dpdk-dev] [PATCH 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
2020-01-08 16:15 ` [dpdk-dev] [PATCH 1/4] net/mlx5: move Tx complete request routine Viacheslav Ovsiienko
2020-01-08 16:15 ` [dpdk-dev] [PATCH 2/4] net/mlx5: update Tx error handling routine Viacheslav Ovsiienko
2020-01-08 16:16 ` [dpdk-dev] [PATCH 3/4] net/mlx5: add free on completion queue Viacheslav Ovsiienko
2020-01-08 16:16 ` [dpdk-dev] [PATCH 4/4] net/mlx5: engage " Viacheslav Ovsiienko
2020-01-09 10:56 ` [dpdk-dev] [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage Viacheslav Ovsiienko
2020-01-09 10:56   ` [dpdk-dev] [PATCH v2 1/4] net/mlx5: move Tx complete request routine Viacheslav Ovsiienko
2020-01-09 10:56   ` [dpdk-dev] [PATCH v2 2/4] net/mlx5: update Tx error handling routine Viacheslav Ovsiienko
2020-01-09 10:56   ` [dpdk-dev] [PATCH v2 3/4] net/mlx5: add free on completion queue Viacheslav Ovsiienko
2020-01-09 15:12     ` Ferruh Yigit
2020-01-09 15:22       ` Slava Ovsiienko
2020-01-09 10:56   ` [dpdk-dev] [PATCH v2 4/4] net/mlx5: engage " Viacheslav Ovsiienko
2020-01-09 15:18     ` Ferruh Yigit
2020-01-09 15:27       ` Slava Ovsiienko
2020-01-09 15:43         ` Ferruh Yigit
2020-01-09 16:22           ` Slava Ovsiienko
2020-01-10  9:06             ` Thomas Monjalon
2020-01-10  9:28               ` Slava Ovsiienko
2020-01-10  9:34                 ` Thomas Monjalon
2020-01-10  9:55                   ` Slava Ovsiienko
2020-01-10 13:11                     ` Thomas Monjalon
2020-01-10 13:42                       ` Slava Ovsiienko
2020-01-17 10:44           ` Slava Ovsiienko
2020-01-09 14:22   ` [dpdk-dev] [PATCH v2 0/4] net/mlx5: remove Tx descriptor reserved field usage Raslan Darawsheh
2020-01-13  9:35   ` Raslan Darawsheh
2020-01-09 17:16 ` [dpdk-dev] [PATCH v3 " Viacheslav Ovsiienko
2020-01-09 17:16   ` [dpdk-dev] [PATCH v3 1/4] net/mlx5: move Tx complete request routine Viacheslav Ovsiienko
2020-01-09 17:16   ` [dpdk-dev] [PATCH v3 2/4] net/mlx5: update Tx error handling routine Viacheslav Ovsiienko
2020-01-09 17:16   ` [dpdk-dev] [PATCH v3 3/4] net/mlx5: add free on completion queue Viacheslav Ovsiienko
2020-01-09 17:16   ` [dpdk-dev] [PATCH v3 4/4] net/mlx5: engage " Viacheslav Ovsiienko
2020-01-13  9:36   ` [dpdk-dev] [PATCH v3 0/4] net/mlx5: remove Tx descriptor reserved field usage Raslan Darawsheh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).