DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH 01/11] common/cnxk: free pending sqe buffers
@ 2022-11-28  9:54 Nithin Dabilpuram
  2022-11-28  9:54 ` [PATCH 02/11] net/cnxk: register callback to get queue errors Nithin Dabilpuram
                   ` (9 more replies)
  0 siblings, 10 replies; 12+ messages in thread
From: Nithin Dabilpuram @ 2022-11-28  9:54 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao; +Cc: jerinj, dev

From: Satha Rao <skoteshwar@marvell.com>

This patch provides a callback mechanism when SQ receives MNQ_ERR.
Even when SQ got MNQ_ERR interrupt application still enqueue
packets for sending they will be struck at SQ, so we are freeing
all these pending packets when we called SQ finish.

Signed-off-by: Satha Rao <skoteshwar@marvell.com>
---

Depends-on: series-25794 ("net/cnxk: rework no-fast-free offload handling")

 drivers/common/cnxk/roc_dev_priv.h  |   4 +
 drivers/common/cnxk/roc_nix.h       |   5 +
 drivers/common/cnxk/roc_nix_irq.c   |  11 ++-
 drivers/common/cnxk/roc_nix_priv.h  |   2 +
 drivers/common/cnxk/roc_nix_queue.c |  32 +++++--
 drivers/common/cnxk/roc_nix_tm.c    | 141 +++++++++++++++++++++++++++-
 drivers/common/cnxk/version.map     |   2 +
 7 files changed, 186 insertions(+), 11 deletions(-)

diff --git a/drivers/common/cnxk/roc_dev_priv.h b/drivers/common/cnxk/roc_dev_priv.h
index 302dc0feb0..e21a7154c0 100644
--- a/drivers/common/cnxk/roc_dev_priv.h
+++ b/drivers/common/cnxk/roc_dev_priv.h
@@ -30,6 +30,9 @@ typedef void (*link_info_t)(void *roc_nix,
 /* PTP info callback */
 typedef int (*ptp_info_t)(void *roc_nix, bool enable);
 
+/* Queue Error get callback */
+typedef void (*q_err_cb_t)(void *roc_nix, void *data);
+
 /* Link status get callback */
 typedef void (*link_status_get_t)(void *roc_nix,
 				  struct cgx_link_user_info *link);
@@ -38,6 +41,7 @@ struct dev_ops {
 	link_info_t link_status_update;
 	ptp_info_t ptp_info_update;
 	link_status_get_t link_status_get;
+	q_err_cb_t q_err_cb;
 };
 
 #define dev_is_vf(dev) ((dev)->hwcap & DEV_HWCAP_F_VF)
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index 6654a2df78..dfc87e8758 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -405,6 +405,9 @@ typedef void (*link_status_t)(struct roc_nix *roc_nix,
 /* PTP info update callback */
 typedef int (*ptp_info_update_t)(struct roc_nix *roc_nix, bool enable);
 
+/* Queue Error get callback */
+typedef void (*q_err_get_t)(struct roc_nix *roc_nix, void *data);
+
 /* Link status get callback */
 typedef void (*link_info_get_t)(struct roc_nix *roc_nix,
 				struct roc_nix_link_info *link);
@@ -783,6 +786,8 @@ void __roc_api roc_nix_mac_link_cb_unregister(struct roc_nix *roc_nix);
 int __roc_api roc_nix_mac_link_info_get_cb_register(
 	struct roc_nix *roc_nix, link_info_get_t link_info_get);
 void __roc_api roc_nix_mac_link_info_get_cb_unregister(struct roc_nix *roc_nix);
+int __roc_api roc_nix_q_err_cb_register(struct roc_nix *roc_nix, q_err_get_t sq_err_handle);
+void __roc_api roc_nix_q_err_cb_unregister(struct roc_nix *roc_nix);
 
 /* Ops */
 int __roc_api roc_nix_switch_hdr_set(struct roc_nix *roc_nix,
diff --git a/drivers/common/cnxk/roc_nix_irq.c b/drivers/common/cnxk/roc_nix_irq.c
index d72980fb18..661af79193 100644
--- a/drivers/common/cnxk/roc_nix_irq.c
+++ b/drivers/common/cnxk/roc_nix_irq.c
@@ -249,9 +249,9 @@ nix_lf_q_irq(void *param)
 {
 	struct nix_qint *qint = (struct nix_qint *)param;
 	uint8_t irq, qintx = qint->qintx;
+	int q, cq, rq, sq, intr_cb = 0;
 	struct nix *nix = qint->nix;
 	struct dev *dev = &nix->dev;
-	int q, cq, rq, sq;
 	uint64_t intr;
 	uint8_t rc;
 
@@ -301,8 +301,10 @@ nix_lf_q_irq(void *param)
 
 		/* Detect Meta-descriptor enqueue error */
 		rc = nix_lf_sq_debug_reg(nix, NIX_LF_MNQ_ERR_DBG);
-		if (rc)
+		if (rc) {
 			plt_err("SQ=%d NIX_SQINT_MNQ_ERR, errcode %x", sq, rc);
+			intr_cb = 1;
+		}
 
 		/* Detect Send error */
 		rc = nix_lf_sq_debug_reg(nix, NIX_LF_SEND_ERR_DBG);
@@ -321,6 +323,11 @@ nix_lf_q_irq(void *param)
 	/* Dump registers to std out */
 	roc_nix_lf_reg_dump(nix_priv_to_roc_nix(nix), NULL);
 	roc_nix_queues_ctx_dump(nix_priv_to_roc_nix(nix), NULL);
+
+	/* Call reset callback */
+	if (intr_cb)
+		if (dev->ops->q_err_cb)
+			dev->ops->q_err_cb(nix_priv_to_roc_nix(nix), NULL);
 }
 
 int
diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h
index 2eba44c248..02290a1b86 100644
--- a/drivers/common/cnxk/roc_nix_priv.h
+++ b/drivers/common/cnxk/roc_nix_priv.h
@@ -406,6 +406,8 @@ int nix_tm_bp_config_set(struct roc_nix *roc_nix, uint16_t sq, uint16_t tc,
 			 bool enable, bool force_flush);
 void nix_rq_vwqe_flush(struct roc_nix_rq *rq, uint16_t vwqe_interval);
 int nix_tm_mark_init(struct nix *nix);
+void nix_tm_sq_free_sqe_buffer(uint64_t *sqe, int head_off, int end_off, int instr_sz);
+int roc_nix_tm_sq_free_pending_sqe(struct nix *nix, int q);
 
 /*
  * TM priv utils.
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index 1cb1fd2101..8a84a34bef 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -1089,9 +1089,8 @@ sq_cn9k_fini(struct nix *nix, struct roc_nix_sq *sq)
 	while (count) {
 		void *next_sqb;
 
-		next_sqb = *(void **)((uintptr_t)sqb_buf +
-				      (uint32_t)((sqes_per_sqb - 1) *
-						 sq->max_sqe_sz));
+		next_sqb = *(void **)((uint64_t *)sqb_buf +
+				      (uint32_t)((sqes_per_sqb - 1) * (0x2 >> sq->max_sqe_sz) * 8));
 		roc_npa_aura_op_free(sq->aura_handle, 1, (uint64_t)sqb_buf);
 		sqb_buf = next_sqb;
 		count--;
@@ -1206,9 +1205,8 @@ sq_fini(struct nix *nix, struct roc_nix_sq *sq)
 	while (count) {
 		void *next_sqb;
 
-		next_sqb = *(void **)((uintptr_t)sqb_buf +
-				      (uint32_t)((sqes_per_sqb - 1) *
-						 sq->max_sqe_sz));
+		next_sqb = *(void **)((uint64_t *)sqb_buf +
+				      (uint32_t)((sqes_per_sqb - 1) * (0x2 >> sq->max_sqe_sz) * 8));
 		roc_npa_aura_op_free(sq->aura_handle, 1, (uint64_t)sqb_buf);
 		sqb_buf = next_sqb;
 		count--;
@@ -1386,3 +1384,25 @@ roc_nix_sq_head_tail_get(struct roc_nix *roc_nix, uint16_t qid, uint32_t *head,
 	/* Update tail index as per used sqb count */
 	*tail += (sqes_per_sqb * (sqb_cnt - 1));
 }
+
+int
+roc_nix_q_err_cb_register(struct roc_nix *roc_nix, q_err_get_t sq_err_handle)
+{
+	struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+	struct dev *dev = &nix->dev;
+
+	if (sq_err_handle == NULL)
+		return NIX_ERR_PARAM;
+
+	dev->ops->q_err_cb = (q_err_cb_t)sq_err_handle;
+	return 0;
+}
+
+void
+roc_nix_q_err_cb_unregister(struct roc_nix *roc_nix)
+{
+	struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+	struct dev *dev = &nix->dev;
+
+	dev->ops->q_err_cb = NULL;
+}
diff --git a/drivers/common/cnxk/roc_nix_tm.c b/drivers/common/cnxk/roc_nix_tm.c
index be8da714cd..255ca83f48 100644
--- a/drivers/common/cnxk/roc_nix_tm.c
+++ b/drivers/common/cnxk/roc_nix_tm.c
@@ -607,6 +607,136 @@ roc_nix_tm_sq_flush_spin(struct roc_nix_sq *sq)
 	return -EFAULT;
 }
 
+void
+nix_tm_sq_free_sqe_buffer(uint64_t *sqe, int head_off, int end_off, int instr_sz)
+{
+	int i, j, inc = (8 * (0x2 >> instr_sz)), segs;
+	struct nix_send_hdr_s *send_hdr;
+	uint64_t *ptr, aura_handle;
+	struct idev_cfg *idev;
+
+	if (!sqe)
+		return;
+
+	idev = idev_get_cfg();
+	if (idev == NULL)
+		return;
+
+	ptr = sqe + (head_off * inc);
+	for (i = head_off; i < end_off; i++) {
+		ptr = sqe + (i * inc);
+		send_hdr = (struct nix_send_hdr_s *)(ptr);
+		aura_handle = roc_npa_aura_handle_gen(send_hdr->w0.aura, idev->npa->base);
+		ptr += 2;
+		if (((*ptr >> 60) & 0xF) == NIX_SUBDC_EXT)
+			ptr += 2;
+		if (((*ptr >> 60) & 0xF) == NIX_SUBDC_AGE_AND_STATS)
+			ptr += 2;
+		if (((*ptr >> 60) & 0xF) == NIX_SUBDC_JUMP) {
+			ptr += 1;
+			ptr = (uint64_t *)*ptr;
+		}
+		if (((*ptr >> 60) & 0xF) == NIX_SUBDC_CRC)
+			ptr += 2;
+		/* We are not parsing immediate send descriptor */
+		if (((*ptr >> 60) & 0xF) == NIX_SUBDC_IMM)
+			continue;
+		while (1) {
+			if (((*ptr >> 60) & 0xF) == NIX_SUBDC_SG) {
+				segs = (*ptr >> 48) & 0x3;
+				ptr += 1;
+				for (j = 0; j < segs; j++) {
+					roc_npa_aura_op_free(aura_handle, 0, *ptr);
+					ptr += 1;
+				}
+				if (segs == 2)
+					ptr += 1;
+			} else if (((*ptr >> 60) & 0xF) == NIX_SUBDC_SG2) {
+				uint64_t aura = (*ptr >> 16) & 0xFFFFF;
+
+				aura = roc_npa_aura_handle_gen(aura, idev->npa->base);
+				ptr += 1;
+				roc_npa_aura_op_free(aura, 0, *ptr);
+				ptr += 1;
+			} else
+				break;
+		}
+	}
+}
+
+int
+roc_nix_tm_sq_free_pending_sqe(struct nix *nix, int q)
+{
+	int head_off, count, rc = 0, tail_off;
+	struct roc_nix_sq *sq = nix->sqs[q];
+	void *sqb_buf, *dat, *tail_sqb;
+	struct dev *dev = &nix->dev;
+	struct ndc_sync_op *ndc_req;
+	uint16_t sqes_per_sqb;
+	struct mbox *mbox;
+
+	mbox = dev->mbox;
+	/* Sync NDC-NIX-TX for LF */
+	ndc_req = mbox_alloc_msg_ndc_sync_op(mbox);
+	if (ndc_req == NULL)
+		return -EFAULT;
+
+	ndc_req->nix_lf_tx_sync = 1;
+	if (mbox_process(mbox))
+		rc |= NIX_ERR_NDC_SYNC;
+
+	if (rc)
+		plt_err("NDC_SYNC failed rc %d", rc);
+
+	rc = nix_q_ctx_get(dev, NIX_AQ_CTYPE_SQ, q, (void *)&dat);
+
+	if (roc_model_is_cn9k()) {
+		volatile struct nix_sq_ctx_s *ctx = (struct nix_sq_ctx_s *)dat;
+
+		/* We will cleanup SQE buffers only when we received MNQ interrupt */
+		if (!ctx->mnq_dis)
+			return -EFAULT;
+
+		count = ctx->sqb_count;
+		sqb_buf = (void *)ctx->head_sqb;
+		tail_sqb = (void *)ctx->tail_sqb;
+		head_off = ctx->head_offset;
+		tail_off = ctx->tail_offset;
+	} else {
+		volatile struct nix_cn10k_sq_ctx_s *ctx = (struct nix_cn10k_sq_ctx_s *)dat;
+
+		/* We will cleanup SQE buffers only when we received MNQ interrupt */
+		if (!ctx->mnq_dis)
+			return -EFAULT;
+
+		count = ctx->sqb_count;
+		/* Free SQB's that are used */
+		sqb_buf = (void *)ctx->head_sqb;
+		tail_sqb = (void *)ctx->tail_sqb;
+		head_off = ctx->head_offset;
+		tail_off = ctx->tail_offset;
+	}
+	sqes_per_sqb = 1 << sq->sqes_per_sqb_log2;
+	/* Free SQB's that are used */
+	while (count) {
+		void *next_sqb;
+
+		if (sqb_buf == tail_sqb)
+			nix_tm_sq_free_sqe_buffer(sqb_buf, head_off, tail_off, sq->max_sqe_sz);
+		else
+			nix_tm_sq_free_sqe_buffer(sqb_buf, head_off, (sqes_per_sqb - 1),
+						  sq->max_sqe_sz);
+		next_sqb = *(void **)((uint64_t *)sqb_buf +
+				      (uint32_t)((sqes_per_sqb - 1) * (0x2 >> sq->max_sqe_sz) * 8));
+		roc_npa_aura_op_free(sq->aura_handle, 1, (uint64_t)sqb_buf);
+		sqb_buf = next_sqb;
+		head_off = 0;
+		count--;
+	}
+
+	return 0;
+}
+
 /* Flush and disable tx queue and its parent SMQ */
 int
 nix_tm_sq_flush_pre(struct roc_nix_sq *sq)
@@ -635,7 +765,7 @@ nix_tm_sq_flush_pre(struct roc_nix_sq *sq)
 
 	/* Find the node for this SQ */
 	node = nix_tm_node_search(nix, qid, tree);
-	if (!node || !(node->flags & NIX_TM_NODE_ENABLED)) {
+	if (!node) {
 		plt_err("Invalid node/state for sq %u", qid);
 		return -EFAULT;
 	}
@@ -691,8 +821,13 @@ nix_tm_sq_flush_pre(struct roc_nix_sq *sq)
 		/* Wait for sq entries to be flushed */
 		rc = roc_nix_tm_sq_flush_spin(sq);
 		if (rc) {
-			plt_err("Failed to drain sq %u, rc=%d\n", sq->qid, rc);
-			return rc;
+			rc = roc_nix_tm_sq_free_pending_sqe(nix, sq->qid);
+			if (rc) {
+				plt_err("Failed to drain sq %u, rc=%d\n", sq->qid, rc);
+				return rc;
+			}
+			/* Freed all pending SQEs for this SQ, so disable this node */
+			sibling->flags &= ~NIX_TM_NODE_ENABLED;
 		}
 	}
 
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 17f0ec6b48..70503c0470 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -224,6 +224,8 @@ INTERNAL {
 	roc_nix_ptp_rx_ena_dis;
 	roc_nix_ptp_sync_time_adjust;
 	roc_nix_ptp_tx_ena_dis;
+	roc_nix_q_err_cb_register;
+	roc_nix_q_err_cb_unregister;
 	roc_nix_queues_ctx_dump;
 	roc_nix_ras_intr_ena_dis;
 	roc_nix_reassembly_configure;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 02/11] net/cnxk: register callback to get queue errors
  2022-11-28  9:54 [PATCH 01/11] common/cnxk: free pending sqe buffers Nithin Dabilpuram
@ 2022-11-28  9:54 ` Nithin Dabilpuram
  2022-11-28  9:54 ` [PATCH 03/11] common/cnxk: set default SQ TC value Nithin Dabilpuram
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Nithin Dabilpuram @ 2022-11-28  9:54 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao; +Cc: jerinj, dev

From: Satha Rao <skoteshwar@marvell.com>

Register a callback mechanism to get SQ error interrupts, add
call ethdev event RTE_ETH_EVENT_INTR_RESET.

Signed-off-by: Satha Rao <skoteshwar@marvell.com>
---
 drivers/net/cnxk/cnxk_ethdev.c | 13 +++++++++++++
 drivers/net/cnxk/cnxk_ethdev.h |  1 +
 2 files changed, 14 insertions(+)

diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c
index bf1585fe67..104aad7b51 100644
--- a/drivers/net/cnxk/cnxk_ethdev.c
+++ b/drivers/net/cnxk/cnxk_ethdev.c
@@ -1750,6 +1750,16 @@ struct eth_dev_ops cnxk_eth_dev_ops = {
 	.cman_config_get = cnxk_nix_cman_config_get,
 };
 
+void
+cnxk_eth_dev_q_err_cb(struct roc_nix *nix, void *data)
+{
+	struct cnxk_eth_dev *dev = (struct cnxk_eth_dev *)nix;
+	struct rte_eth_dev *eth_dev = dev->eth_dev;
+
+	/* Set the flag and execute application callbacks */
+	rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_RESET, data);
+}
+
 static int
 cnxk_eth_dev_init(struct rte_eth_dev *eth_dev)
 {
@@ -1804,6 +1814,9 @@ cnxk_eth_dev_init(struct rte_eth_dev *eth_dev)
 	roc_nix_mac_link_info_get_cb_register(nix,
 					      cnxk_eth_dev_link_status_get_cb);
 
+	/* Register up msg callbacks */
+	roc_nix_q_err_cb_register(nix, cnxk_eth_dev_q_err_cb);
+
 	/* Register callback for inline meta pool create */
 	roc_nix_inl_meta_pool_cb_register(cnxk_nix_inl_meta_pool_cb);
 
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 651ef45ea8..a86e9dba80 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -599,6 +599,7 @@ void cnxk_eth_dev_link_status_cb(struct roc_nix *nix,
 				 struct roc_nix_link_info *link);
 void cnxk_eth_dev_link_status_get_cb(struct roc_nix *nix,
 				     struct roc_nix_link_info *link);
+void cnxk_eth_dev_q_err_cb(struct roc_nix *nix, void *data);
 int cnxk_nix_link_update(struct rte_eth_dev *eth_dev, int wait_to_complete);
 int cnxk_nix_queue_stats_mapping(struct rte_eth_dev *dev, uint16_t queue_id,
 				 uint8_t stat_idx, uint8_t is_rx);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 03/11] common/cnxk: set default SQ TC value
  2022-11-28  9:54 [PATCH 01/11] common/cnxk: free pending sqe buffers Nithin Dabilpuram
  2022-11-28  9:54 ` [PATCH 02/11] net/cnxk: register callback to get queue errors Nithin Dabilpuram
@ 2022-11-28  9:54 ` Nithin Dabilpuram
  2022-11-28  9:54 ` [PATCH 04/11] common/cnxk: split NIX TM hierarchy enable API Nithin Dabilpuram
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Nithin Dabilpuram @ 2022-11-28  9:54 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao; +Cc: jerinj, dev

From: Satha Rao <skoteshwar@marvell.com>

Zero is valid TC, initialize SQ TC value to ROC_NIX_PFC_CLASS_INVALID.

Signed-off-by: Satha Rao <skoteshwar@marvell.com>
---
 drivers/common/cnxk/roc_nix_queue.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index 8a84a34bef..0dd3c8d4df 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -1235,6 +1235,7 @@ roc_nix_sq_init(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
 		return NIX_ERR_QUEUE_INVALID_RANGE;
 
 	sq->roc_nix = roc_nix;
+	sq->tc = ROC_NIX_PFC_CLASS_INVALID;
 	/*
 	 * Allocate memory for flow control updates from HW.
 	 * Alloc one cache line, so that fits all FC_STYPE modes.
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 04/11] common/cnxk: split NIX TM hierarchy enable API
  2022-11-28  9:54 [PATCH 01/11] common/cnxk: free pending sqe buffers Nithin Dabilpuram
  2022-11-28  9:54 ` [PATCH 02/11] net/cnxk: register callback to get queue errors Nithin Dabilpuram
  2022-11-28  9:54 ` [PATCH 03/11] common/cnxk: set default SQ TC value Nithin Dabilpuram
@ 2022-11-28  9:54 ` Nithin Dabilpuram
  2022-11-28  9:54 ` [PATCH 05/11] event/cnxk: net/cnxk: support transmit completion Nithin Dabilpuram
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Nithin Dabilpuram @ 2022-11-28  9:54 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao; +Cc: jerinj, dev

From: Satha Rao <skoteshwar@marvell.com>

roc_nix_tm_hierarchy_enable() API will do two things internally,
1) Creation of all TM nodes, allocate HW resources and connect
   them as requested.
2) Enable transmit by XON SMQ and start SQs

In test cases where both steps called independently. In order
to support this, patch split the functionality into two APIs.

Signed-off-by: Satha Rao <skoteshwar@marvell.com>
---
 drivers/common/cnxk/roc_nix.h        |   2 +
 drivers/common/cnxk/roc_nix_tm_ops.c | 116 +++++++++++++++------------
 drivers/common/cnxk/version.map      |   1 +
 3 files changed, 69 insertions(+), 50 deletions(-)

diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index dfc87e8758..47ee078c2e 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -672,6 +672,8 @@ int __roc_api roc_nix_tm_hierarchy_disable(struct roc_nix *roc_nix);
 int __roc_api roc_nix_tm_hierarchy_enable(struct roc_nix *roc_nix,
 					  enum roc_nix_tm_tree tree,
 					  bool xmit_enable);
+int __roc_api roc_nix_tm_hierarchy_xmit_enable(struct roc_nix *roc_nix, enum roc_nix_tm_tree tree);
+
 
 /*
  * TM utilities API.
diff --git a/drivers/common/cnxk/roc_nix_tm_ops.c b/drivers/common/cnxk/roc_nix_tm_ops.c
index 4bf7b1e104..5e8637ebdd 100644
--- a/drivers/common/cnxk/roc_nix_tm_ops.c
+++ b/drivers/common/cnxk/roc_nix_tm_ops.c
@@ -549,6 +549,67 @@ roc_nix_tm_hierarchy_disable(struct roc_nix *roc_nix)
 	return rc;
 }
 
+int
+roc_nix_tm_hierarchy_xmit_enable(struct roc_nix *roc_nix, enum roc_nix_tm_tree tree)
+{
+	struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+	struct nix_tm_node_list *list;
+	struct nix_tm_node *node;
+	struct roc_nix_sq *sq;
+	uint16_t sq_id;
+	int rc;
+
+	if (tree >= ROC_NIX_TM_TREE_MAX)
+		return NIX_ERR_PARAM;
+
+	list = nix_tm_node_list(nix, tree);
+
+	/* Update SQ Sched Data while SQ is idle */
+	TAILQ_FOREACH(node, list, node) {
+		if (!nix_tm_is_leaf(nix, node->lvl))
+			continue;
+
+		rc = nix_tm_sq_sched_conf(nix, node, false);
+		if (rc) {
+			plt_err("SQ %u sched update failed, rc=%d", node->id,
+				rc);
+			return rc;
+		}
+	}
+
+	/* Finally XON all SMQ's */
+	TAILQ_FOREACH(node, list, node) {
+		if (node->hw_lvl != NIX_TXSCH_LVL_SMQ)
+			continue;
+
+		rc = nix_tm_smq_xoff(nix, node, false);
+		if (rc) {
+			plt_err("Failed to enable smq %u, rc=%d", node->hw_id,
+				rc);
+			return rc;
+		}
+	}
+
+	/* Enable xmit as all the topology is ready */
+	TAILQ_FOREACH(node, list, node) {
+		if (!nix_tm_is_leaf(nix, node->lvl))
+			continue;
+
+		sq_id = node->id;
+		sq = nix->sqs[sq_id];
+
+		rc = roc_nix_tm_sq_aura_fc(sq, true);
+		if (rc) {
+			plt_err("TM sw xon failed on SQ %u, rc=%d", node->id,
+				rc);
+			return rc;
+		}
+		node->flags |= NIX_TM_NODE_ENABLED;
+	}
+
+	return 0;
+}
+
 int
 roc_nix_tm_hierarchy_enable(struct roc_nix *roc_nix, enum roc_nix_tm_tree tree,
 			    bool xmit_enable)
@@ -556,9 +617,7 @@ roc_nix_tm_hierarchy_enable(struct roc_nix *roc_nix, enum roc_nix_tm_tree tree,
 	struct nix *nix = roc_nix_to_nix_priv(roc_nix);
 	struct nix_tm_node_list *list;
 	struct nix_tm_node *node;
-	struct roc_nix_sq *sq;
 	uint32_t tree_mask;
-	uint16_t sq_id;
 	int rc;
 
 	if (tree >= ROC_NIX_TM_TREE_MAX)
@@ -613,55 +672,12 @@ roc_nix_tm_hierarchy_enable(struct roc_nix *roc_nix, enum roc_nix_tm_tree tree,
 			node->flags |= NIX_TM_NODE_ENABLED;
 	}
 
-	if (!xmit_enable)
-		goto skip_sq_update;
+	if (xmit_enable)
+		rc = roc_nix_tm_hierarchy_xmit_enable(roc_nix, tree);
 
-	/* Update SQ Sched Data while SQ is idle */
-	TAILQ_FOREACH(node, list, node) {
-		if (!nix_tm_is_leaf(nix, node->lvl))
-			continue;
-
-		rc = nix_tm_sq_sched_conf(nix, node, false);
-		if (rc) {
-			plt_err("SQ %u sched update failed, rc=%d", node->id,
-				rc);
-			return rc;
-		}
-	}
-
-	/* Finally XON all SMQ's */
-	TAILQ_FOREACH(node, list, node) {
-		if (node->hw_lvl != NIX_TXSCH_LVL_SMQ)
-			continue;
-
-		rc = nix_tm_smq_xoff(nix, node, false);
-		if (rc) {
-			plt_err("Failed to enable smq %u, rc=%d", node->hw_id,
-				rc);
-			return rc;
-		}
-	}
-
-	/* Enable xmit as all the topology is ready */
-	TAILQ_FOREACH(node, list, node) {
-		if (!nix_tm_is_leaf(nix, node->lvl))
-			continue;
-
-		sq_id = node->id;
-		sq = nix->sqs[sq_id];
-
-		rc = roc_nix_tm_sq_aura_fc(sq, true);
-		if (rc) {
-			plt_err("TM sw xon failed on SQ %u, rc=%d", node->id,
-				rc);
-			return rc;
-		}
-		node->flags |= NIX_TM_NODE_ENABLED;
-	}
-
-skip_sq_update:
-	nix->tm_flags |= NIX_TM_HIERARCHY_ENA;
-	return 0;
+	if (!rc)
+		nix->tm_flags |= NIX_TM_HIERARCHY_ENA;
+	return rc;
 }
 
 int
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 70503c0470..63fe9deb72 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -270,6 +270,7 @@ INTERNAL {
 	roc_nix_tm_tree_type_get;
 	roc_nix_tm_hierarchy_disable;
 	roc_nix_tm_hierarchy_enable;
+	roc_nix_tm_hierarchy_xmit_enable;
 	roc_nix_tm_init;
 	roc_nix_tm_is_user_hierarchy_enabled;
 	roc_nix_tm_leaf_cnt;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 05/11] event/cnxk: net/cnxk: support transmit completion
  2022-11-28  9:54 [PATCH 01/11] common/cnxk: free pending sqe buffers Nithin Dabilpuram
                   ` (2 preceding siblings ...)
  2022-11-28  9:54 ` [PATCH 04/11] common/cnxk: split NIX TM hierarchy enable API Nithin Dabilpuram
@ 2022-11-28  9:54 ` Nithin Dabilpuram
  2022-11-28  9:54 ` [PATCH 06/11] net/cnxk: fix packet type for IPv6 packets post decryption Nithin Dabilpuram
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Nithin Dabilpuram @ 2022-11-28  9:54 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao,
	Pavan Nikhilesh, Shijith Thotton
  Cc: jerinj, dev, Rakesh Kudurumalla

From: Rakesh Kudurumalla <rkudurumalla@marvell.com>

added support to call callback handler provided
by user when external buffer is attached to mbuf
and transmit completion is enabled.Added support to
enable transmit completion as device args

Signed-off-by: Rakesh Kudurumalla <rkudurumalla@marvell.com>
---
 doc/guides/nics/cnxk.rst               |  14 ++
 drivers/event/cnxk/cn10k_worker.h      |   7 +-
 drivers/event/cnxk/cn9k_worker.h       |   8 +-
 drivers/net/cnxk/cn10k_ethdev.c        |  54 ++++++
 drivers/net/cnxk/cn10k_ethdev.h        |   1 +
 drivers/net/cnxk/cn10k_tx.h            | 215 +++++++++++++++++++----
 drivers/net/cnxk/cn9k_ethdev.c         |  54 ++++++
 drivers/net/cnxk/cn9k_ethdev.h         |   1 +
 drivers/net/cnxk/cn9k_tx.h             | 226 +++++++++++++++++++++----
 drivers/net/cnxk/cnxk_ethdev.c         |  28 ++-
 drivers/net/cnxk/cnxk_ethdev.h         |  17 ++
 drivers/net/cnxk/cnxk_ethdev_devargs.c |   6 +
 12 files changed, 553 insertions(+), 78 deletions(-)

diff --git a/doc/guides/nics/cnxk.rst b/doc/guides/nics/cnxk.rst
index 7da6cb3967..be176b53a2 100644
--- a/doc/guides/nics/cnxk.rst
+++ b/doc/guides/nics/cnxk.rst
@@ -361,6 +361,20 @@ Runtime Config Options
 
       -a 0002:1d:00.0,sdp_channel_mask=0x700/0xf00
 
+- ``Transmit completion handler`` (default ``0``)
+
+   When transmit completion handler is enabled , PMD invokes callback handler
+   provided by application for every packet which has external buf attached to mbuf
+   and frees main mbuf, external buffer is provided to applicatoin. Once external
+   buffer is handed over to application, its application responsibility either to
+   free of reuse external buffer
+
+   using ``tx_compl_ena`` ``devargs`` parameter.
+
+   For example::
+
+      -a 0002:01:00.1,tx_compl_ena=1
+
    With the above configuration, RTE Flow rules API will set the channel
    and channel mask as 0x700 and 0xF00 in the MCAM entries of the  flow rules
    created on the SDP device. This option needs to be used when more than one
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 75a2ff244a..332a2e27c2 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -559,6 +559,9 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
 	if (cn10k_sso_sq_depth(txq) <= 0)
 		return 0;
 
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, 1, 1);
+
 	cn10k_nix_tx_skeleton(txq, cmd, flags, 0);
 	/* Perform header writes before barrier
 	 * for TSO
@@ -566,7 +569,7 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
 	if (flags & NIX_TX_OFFLOAD_TSO_F)
 		cn10k_nix_xmit_prepare_tso(m, flags);
 
-	cn10k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt, &sec,
+	cn10k_nix_xmit_prepare(txq, m, cmd, flags, txq->lso_tun_fmt, &sec,
 			       txq->mark_flag, txq->mark_fmt);
 
 	laddr = lmt_addr;
@@ -581,7 +584,7 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
 	cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
 
 	if (flags & NIX_TX_MULTI_SEG_F)
-		segdw = cn10k_nix_prepare_mseg(m, (uint64_t *)laddr, flags);
+		segdw = cn10k_nix_prepare_mseg(txq, m, (uint64_t *)laddr, flags);
 	else
 		segdw = cn10k_nix_tx_ext_subs(flags) + 2;
 
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 4c3932da47..54213db3b4 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -781,12 +781,16 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
 	    !(flags & NIX_TX_OFFLOAD_SECURITY_F))
 		rte_io_wmb();
 	txq = cn9k_sso_hws_xtract_meta(m, txq_data);
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, 1, 1);
+
 	if (((txq->nb_sqb_bufs_adj -
 	      __atomic_load_n((int16_t *)txq->fc_mem, __ATOMIC_RELAXED))
 	     << txq->sqes_per_sqb_log2) <= 0)
 		return 0;
 	cn9k_nix_tx_skeleton(txq, cmd, flags, 0);
-	cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt, txq->mark_flag,
+	cn9k_nix_xmit_prepare(txq, m, cmd, flags, txq->lso_tun_fmt, txq->mark_flag,
 			      txq->mark_fmt);
 
 	if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
@@ -808,7 +812,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
 	}
 
 	if (flags & NIX_TX_MULTI_SEG_F) {
-		const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
+		const uint16_t segdw = cn9k_nix_prepare_mseg(txq, m, cmd, flags);
 		cn9k_nix_xmit_prepare_tstamp(txq, cmd, m->ol_flags, segdw,
 					     flags);
 		if (!CNXK_TT_FROM_EVENT(ev->event)) {
diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c
index 4658713591..61278bb72c 100644
--- a/drivers/net/cnxk/cn10k_ethdev.c
+++ b/drivers/net/cnxk/cn10k_ethdev.c
@@ -50,6 +50,7 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev)
 {
 	struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);
 	uint64_t conf = dev->tx_offloads;
+	struct roc_nix *nix = &dev->nix;
 	uint16_t flags = 0;
 
 	/* Fastpath is dependent on these enums */
@@ -113,6 +114,9 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev)
 	if (dev->tx_mark)
 		flags |= NIX_TX_OFFLOAD_VLAN_QINQ_F;
 
+	if (nix->tx_compl_ena)
+		flags |= NIX_TX_OFFLOAD_MBUF_NOFF_F;
+
 	return flags;
 }
 
@@ -165,6 +169,49 @@ nix_form_default_desc(struct cnxk_eth_dev *dev, struct cn10k_eth_txq *txq,
 	rte_wmb();
 }
 
+static int
+cn10k_nix_tx_compl_setup(struct cnxk_eth_dev *dev,
+		struct cn10k_eth_txq *txq,
+		struct roc_nix_sq *sq, uint16_t nb_desc)
+{
+	struct roc_nix_cq *cq;
+
+	cq = &dev->cqs[sq->cqid];
+	txq->tx_compl.desc_base = (uintptr_t)cq->desc_base;
+	txq->tx_compl.cq_door = cq->door;
+	txq->tx_compl.cq_status = cq->status;
+	txq->tx_compl.wdata = cq->wdata;
+	txq->tx_compl.head = cq->head;
+	txq->tx_compl.qmask = cq->qmask;
+	/* Total array size holding buffers is equal to
+	 * number of entries in cq and sq
+	 * max buffer in array = desc in cq + desc in sq
+	 */
+	txq->tx_compl.nb_desc_mask = (2 * rte_align32pow2(nb_desc)) - 1;
+	txq->tx_compl.ena = true;
+
+	txq->tx_compl.ptr = (struct rte_mbuf **)plt_zmalloc(txq->tx_compl.nb_desc_mask *
+			sizeof(struct rte_mbuf *), 0);
+	if (!txq->tx_compl.ptr)
+		return -1;
+
+	return 0;
+}
+
+static void
+cn10k_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid)
+{
+	struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);
+	struct roc_nix *nix = &dev->nix;
+	struct cn10k_eth_txq *txq;
+
+	cnxk_nix_tx_queue_release(eth_dev, qid);
+	txq = eth_dev->data->tx_queues[qid];
+
+	if (nix->tx_compl_ena)
+		plt_free(txq->tx_compl.ptr);
+}
+
 static int
 cn10k_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			 uint16_t nb_desc, unsigned int socket,
@@ -191,6 +238,12 @@ cn10k_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Update fast path queue */
 	txq = eth_dev->data->tx_queues[qid];
 	txq->fc_mem = sq->fc;
+	if (nix->tx_compl_ena) {
+		rc = cn10k_nix_tx_compl_setup(dev, txq, sq, nb_desc);
+		if (rc)
+			return rc;
+	}
+
 	/* Store lmt base in tx queue for easy access */
 	txq->lmt_base = nix->lmt_base;
 	txq->io_addr = sq->io_addr;
@@ -711,6 +764,7 @@ nix_eth_dev_ops_override(void)
 	cnxk_eth_dev_ops.dev_configure = cn10k_nix_configure;
 	cnxk_eth_dev_ops.tx_queue_setup = cn10k_nix_tx_queue_setup;
 	cnxk_eth_dev_ops.rx_queue_setup = cn10k_nix_rx_queue_setup;
+	cnxk_eth_dev_ops.tx_queue_release = cn10k_nix_tx_queue_release;
 	cnxk_eth_dev_ops.tx_queue_stop = cn10k_nix_tx_queue_stop;
 	cnxk_eth_dev_ops.dev_start = cn10k_nix_dev_start;
 	cnxk_eth_dev_ops.dev_ptypes_set = cn10k_nix_ptypes_set;
diff --git a/drivers/net/cnxk/cn10k_ethdev.h b/drivers/net/cnxk/cn10k_ethdev.h
index 948c8348ad..c843ba9881 100644
--- a/drivers/net/cnxk/cn10k_ethdev.h
+++ b/drivers/net/cnxk/cn10k_ethdev.h
@@ -24,6 +24,7 @@ struct cn10k_eth_txq {
 	uint64_t ts_mem;
 	uint64_t mark_flag : 8;
 	uint64_t mark_fmt : 48;
+	struct cnxk_eth_txq_comp tx_compl;
 } __plt_cache_aligned;
 
 struct cn10k_eth_rxq {
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 09c332b2b5..c51de742ad 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -643,6 +643,28 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
 }
 #endif
 
+static __rte_always_inline uint64_t
+cn10k_nix_prefree_seg(struct rte_mbuf *m, struct cn10k_eth_txq *txq,
+		struct nix_send_hdr_s *send_hdr)
+{
+	uint32_t sqe_id;
+
+	if (RTE_MBUF_HAS_EXTBUF(m)) {
+		if (send_hdr->w0.pnc) {
+			txq->tx_compl.ptr[send_hdr->w1.sqe_id]->next = m;
+		} else {
+			sqe_id = __atomic_fetch_add(&txq->tx_compl.sqe_id, 1, __ATOMIC_RELAXED);
+			send_hdr->w0.pnc = 1;
+			send_hdr->w1.sqe_id = sqe_id &
+				txq->tx_compl.nb_desc_mask;
+			txq->tx_compl.ptr[send_hdr->w1.sqe_id] = m;
+		}
+		return 1;
+	} else {
+		return cnxk_nix_prefree_seg(m);
+	}
+}
+
 static __rte_always_inline void
 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
 {
@@ -696,7 +718,8 @@ cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
 }
 
 static __rte_always_inline void
-cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
+cn10k_nix_xmit_prepare(struct cn10k_eth_txq *txq,
+		       struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
 		       const uint64_t lso_tun_fmt, bool *sec, uint8_t mark_flag,
 		       uint64_t mark_fmt)
 {
@@ -888,7 +911,7 @@ cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
 			 *		is greater than 1
 			 * DF bit = 0 otherwise
 			 */
-			send_hdr->w0.df = cnxk_nix_prefree_seg(m);
+			send_hdr->w0.df = cn10k_nix_prefree_seg(m, txq, send_hdr);
 		}
 		/* Mark mempool object as "put" since it is freed by NIX */
 		if (!send_hdr->w0.df)
@@ -959,7 +982,8 @@ cn10k_nix_xmit_prepare_tstamp(struct cn10k_eth_txq *txq, uintptr_t lmt_addr,
 }
 
 static __rte_always_inline uint16_t
-cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
+cn10k_nix_prepare_mseg(struct cn10k_eth_txq *txq,
+		       struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 {
 	uint64_t prefree = 0, aura0, aura, nb_segs, segdw;
 	struct nix_send_hdr_s *send_hdr;
@@ -993,7 +1017,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 
 	/* Set invert df if buffer is not to be freed by H/W */
 	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
-		prefree = cnxk_nix_prefree_seg(m);
+		prefree = cn10k_nix_prefree_seg(m, txq, send_hdr);
 		l_sg.i1 = prefree;
 	}
 
@@ -1035,7 +1059,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 
 		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
 			aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
-			prefree = cnxk_nix_prefree_seg(m);
+			prefree = cn10k_nix_prefree_seg(m, txq, send_hdr);
 			is_sg2 = aura != aura0 && !prefree;
 		}
 
@@ -1119,6 +1143,83 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 	return segdw;
 }
 
+static inline uint16_t
+nix_tx_compl_nb_pkts(struct cn10k_eth_txq *txq, const uint64_t wdata,
+		const uint16_t pkts, const uint32_t qmask)
+{
+	uint32_t available = txq->tx_compl.available;
+
+	/* Update the available count if cached value is not enough */
+	if (unlikely(available < pkts)) {
+		uint64_t reg, head, tail;
+
+		/* Use LDADDA version to avoid reorder */
+		reg = roc_atomic64_add_sync(wdata, txq->tx_compl.cq_status);
+		/* CQ_OP_STATUS operation error */
+		if (reg & BIT_ULL(NIX_CQ_OP_STAT_OP_ERR) ||
+				reg & BIT_ULL(NIX_CQ_OP_STAT_CQ_ERR))
+			return 0;
+
+		tail = reg & 0xFFFFF;
+		head = (reg >> 20) & 0xFFFFF;
+		if (tail < head)
+			available = tail - head + qmask + 1;
+		else
+			available = tail - head;
+
+		txq->tx_compl.available = available;
+	}
+	return RTE_MIN(pkts, available);
+}
+
+static inline void
+handle_tx_completion_pkts(struct cn10k_eth_txq *txq, const uint16_t pkts,
+			  uint8_t mt_safe)
+{
+#define CNXK_NIX_CQ_ENTRY_SZ 128
+#define CQE_SZ(x)            ((x) * CNXK_NIX_CQ_ENTRY_SZ)
+
+	uint16_t tx_pkts = 0, nb_pkts;
+	const uintptr_t desc = txq->tx_compl.desc_base;
+	const uint64_t wdata = txq->tx_compl.wdata;
+	const uint32_t qmask = txq->tx_compl.qmask;
+	uint32_t head = txq->tx_compl.head;
+	struct nix_cqe_hdr_s *tx_compl_cq;
+	struct nix_send_comp_s *tx_compl_s0;
+	struct rte_mbuf *m_next, *m;
+
+	if (mt_safe)
+		rte_spinlock_lock(&txq->tx_compl.ext_buf_lock);
+
+	nb_pkts = nix_tx_compl_nb_pkts(txq, wdata, pkts, qmask);
+	while (tx_pkts < nb_pkts) {
+		rte_prefetch_non_temporal((void *)(desc +
+					(CQE_SZ((head + 2) & qmask))));
+		tx_compl_cq = (struct nix_cqe_hdr_s *)
+			(desc + CQE_SZ(head));
+		tx_compl_s0 = (struct nix_send_comp_s *)
+			((uint64_t *)tx_compl_cq + 1);
+		m = txq->tx_compl.ptr[tx_compl_s0->sqe_id];
+		while (m->next != NULL) {
+			m_next = m->next;
+			rte_pktmbuf_free_seg(m);
+			m = m_next;
+		}
+		rte_pktmbuf_free_seg(m);
+
+		head++;
+		head &= qmask;
+		tx_pkts++;
+	}
+	txq->tx_compl.head = head;
+	txq->tx_compl.available -= nb_pkts;
+
+	plt_write64((wdata | nb_pkts), txq->tx_compl.cq_door);
+
+	if (mt_safe)
+		rte_spinlock_unlock(&txq->tx_compl.ext_buf_lock);
+}
+
 static __rte_always_inline uint16_t
 cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
 		    uint16_t pkts, uint64_t *cmd, const uint16_t flags)
@@ -1139,6 +1240,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
 	uint64_t data;
 	bool sec;
 
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, pkts, flags & NIX_TX_VWQE_F);
+
 	if (!(flags & NIX_TX_VWQE_F)) {
 		NIX_XMIT_FC_OR_RETURN(txq, pkts);
 		/* Reduce the cached count */
@@ -1181,7 +1285,7 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
 		if (flags & NIX_TX_OFFLOAD_TSO_F)
 			cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
 
-		cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
+		cn10k_nix_xmit_prepare(txq, tx_pkts[i], cmd, flags, lso_tun_fmt,
 				       &sec, mark_flag, mark_fmt);
 
 		laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
@@ -1285,6 +1389,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
 	uintptr_t laddr;
 	bool sec;
 
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, pkts, flags & NIX_TX_VWQE_F);
+
 	if (!(flags & NIX_TX_VWQE_F)) {
 		NIX_XMIT_FC_OR_RETURN(txq, pkts);
 		/* Reduce the cached count */
@@ -1331,7 +1438,7 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
 		if (flags & NIX_TX_OFFLOAD_TSO_F)
 			cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
 
-		cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
+		cn10k_nix_xmit_prepare(txq, tx_pkts[i], cmd, flags, lso_tun_fmt,
 				       &sec, mark_flag, mark_fmt);
 
 		laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
@@ -1345,7 +1452,7 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
 		/* Move NIX desc to LMT/NIXTX area */
 		cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
 		/* Store sg list directly on lmt line */
-		segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
+		segdw = cn10k_nix_prepare_mseg(txq, tx_pkts[i], (uint64_t *)laddr,
 					       flags);
 		cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
 					      segdw, flags);
@@ -1467,7 +1574,8 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
 }
 
 static __rte_always_inline uint16_t
-cn10k_nix_prepare_mseg_vec_noff(struct rte_mbuf *m, uint64_t *cmd,
+cn10k_nix_prepare_mseg_vec_noff(struct cn10k_eth_txq *txq,
+				struct rte_mbuf *m, uint64_t *cmd,
 				uint64x2_t *cmd0, uint64x2_t *cmd1,
 				uint64x2_t *cmd2, uint64x2_t *cmd3,
 				const uint32_t flags)
@@ -1482,7 +1590,7 @@ cn10k_nix_prepare_mseg_vec_noff(struct rte_mbuf *m, uint64_t *cmd,
 		vst1q_u64(cmd + 2, *cmd1); /* sg */
 	}
 
-	segdw = cn10k_nix_prepare_mseg(m, cmd, flags);
+	segdw = cn10k_nix_prepare_mseg(txq, m, cmd, flags);
 
 	if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
 		vst1q_u64(cmd + segdw * 2 - 2, *cmd3);
@@ -1581,7 +1689,8 @@ cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
 #define NIX_DESCS_PER_LOOP 4
 
 static __rte_always_inline uint8_t
-cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
+cn10k_nix_prep_lmt_mseg_vector(struct cn10k_eth_txq *txq,
+			       struct rte_mbuf **mbufs, uint64x2_t *cmd0,
 			       uint64x2_t *cmd1, uint64x2_t *cmd2,
 			       uint64x2_t *cmd3, uint8_t *segdw,
 			       uint64_t *lmt_addr, __uint128_t *data128,
@@ -1599,7 +1708,7 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
 				lmt_addr += 16;
 				off = 0;
 			}
-			off += cn10k_nix_prepare_mseg_vec_noff(mbufs[j],
+			off += cn10k_nix_prepare_mseg_vec_noff(txq, mbufs[j],
 					lmt_addr + off * 2, &cmd0[j], &cmd1[j],
 					&cmd2[j], &cmd3[j], flags);
 		}
@@ -1741,14 +1850,15 @@ cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
 }
 
 static __rte_always_inline void
-cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
+cn10k_nix_xmit_store(struct cn10k_eth_txq *txq,
+		     struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
 		     uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
 		     uint64x2_t cmd3, const uint16_t flags)
 {
 	uint8_t off;
 
 	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
-		cn10k_nix_prepare_mseg_vec_noff(mbuf, LMT_OFF(laddr, 0, 0),
+		cn10k_nix_prepare_mseg_vec_noff(txq, mbuf, LMT_OFF(laddr, 0, 0),
 						&cmd0, &cmd1, &cmd2, &cmd3,
 						flags);
 		return;
@@ -1816,9 +1926,12 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 	uint64x2_t sgdesc01_w0, sgdesc23_w0;
 	uint64x2_t sgdesc01_w1, sgdesc23_w1;
 	struct cn10k_eth_txq *txq = tx_queue;
+	uint64x2_t xmask01_w0, xmask23_w0;
+	uint64x2_t xmask01_w1, xmask23_w1;
 	rte_iova_t io_addr = txq->io_addr;
 	uintptr_t laddr = txq->lmt_base;
 	uint8_t c_lnum, c_shft, c_loff;
+	struct nix_send_hdr_s send_hdr;
 	uint64x2_t ltypes01, ltypes23;
 	uint64x2_t xtmp128, ytmp128;
 	uint64x2_t xmask01, xmask23;
@@ -1831,6 +1944,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 		uint64_t data[2];
 	} wd;
 
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, pkts, flags & NIX_TX_VWQE_F);
+
 	if (!(flags & NIX_TX_VWQE_F)) {
 		NIX_XMIT_FC_OR_RETURN(txq, pkts);
 		scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
@@ -2664,8 +2780,10 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 		    !(flags & NIX_TX_MULTI_SEG_F) &&
 		    !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
 			/* Set don't free bit if reference count > 1 */
-			xmask01 = vdupq_n_u64(0);
-			xmask23 = xmask01;
+			xmask01_w0 = vdupq_n_u64(0);
+			xmask01_w1 = vdupq_n_u64(0);
+			xmask23_w0 = xmask01_w0;
+			xmask23_w1 = xmask01_w1;
 
 			/* Move mbufs to iova */
 			mbuf0 = (uint64_t *)tx_pkts[0];
@@ -2673,35 +2791,62 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 			mbuf2 = (uint64_t *)tx_pkts[2];
 			mbuf3 = (uint64_t *)tx_pkts[3];
 
-			if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
-				xmask01 = vsetq_lane_u64(0x80000, xmask01, 0);
-			else
+			send_hdr.w0.u = 0;
+			send_hdr.w1.u = 0;
+
+			if (cn10k_nix_prefree_seg((struct rte_mbuf *)mbuf0, txq, &send_hdr)) {
+				send_hdr.w0.df = 1;
+				xmask01_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask01_w0, 0);
+				xmask01_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask01_w1, 0);
+			} else {
 				RTE_MEMPOOL_CHECK_COOKIES(
 					((struct rte_mbuf *)mbuf0)->pool,
 					(void **)&mbuf0, 1, 0);
+			}
 
-			if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
-				xmask01 = vsetq_lane_u64(0x80000, xmask01, 1);
-			else
+			send_hdr.w0.u = 0;
+			send_hdr.w1.u = 0;
+
+			if (cn10k_nix_prefree_seg((struct rte_mbuf *)mbuf1, txq, &send_hdr)) {
+				send_hdr.w0.df = 1;
+				xmask01_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask01_w0, 1);
+				xmask01_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask01_w1, 1);
+			} else {
 				RTE_MEMPOOL_CHECK_COOKIES(
 					((struct rte_mbuf *)mbuf1)->pool,
 					(void **)&mbuf1, 1, 0);
+			}
 
-			if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
-				xmask23 = vsetq_lane_u64(0x80000, xmask23, 0);
-			else
+			send_hdr.w0.u = 0;
+			send_hdr.w1.u = 0;
+
+			if (cn10k_nix_prefree_seg((struct rte_mbuf *)mbuf2, txq, &send_hdr)) {
+				send_hdr.w0.df = 1;
+				xmask23_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask23_w0, 0);
+				xmask23_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask23_w1, 0);
+			} else {
 				RTE_MEMPOOL_CHECK_COOKIES(
 					((struct rte_mbuf *)mbuf2)->pool,
 					(void **)&mbuf2, 1, 0);
+			}
 
-			if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
-				xmask23 = vsetq_lane_u64(0x80000, xmask23, 1);
-			else
+			send_hdr.w0.u = 0;
+			send_hdr.w1.u = 0;
+
+			if (cn10k_nix_prefree_seg((struct rte_mbuf *)mbuf3, txq, &send_hdr)) {
+				send_hdr.w0.df = 1;
+				xmask23_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask23_w0, 1);
+				xmask23_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask23_w1, 1);
+			} else {
 				RTE_MEMPOOL_CHECK_COOKIES(
 					((struct rte_mbuf *)mbuf3)->pool,
 					(void **)&mbuf3, 1, 0);
-			senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
-			senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
+			}
+
+			senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01_w0);
+			senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23_w0);
+			senddesc01_w1 = vorrq_u64(senddesc01_w1, xmask01_w1);
+			senddesc23_w1 = vorrq_u64(senddesc23_w1, xmask23_w1);
 		} else if (!(flags & NIX_TX_MULTI_SEG_F) &&
 			   !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
 			/* Move mbufs to iova */
@@ -2773,7 +2918,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 						   &shift, &wd.data128, &next);
 
 			/* Store mbuf0 to LMTLINE/CPT NIXTX area */
-			cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
+			cn10k_nix_xmit_store(txq, tx_pkts[0], segdw[0], next,
 					     cmd0[0], cmd1[0], cmd2[0], cmd3[0],
 					     flags);
 
@@ -2789,7 +2934,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 						   &shift, &wd.data128, &next);
 
 			/* Store mbuf1 to LMTLINE/CPT NIXTX area */
-			cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
+			cn10k_nix_xmit_store(txq, tx_pkts[1], segdw[1], next,
 					     cmd0[1], cmd1[1], cmd2[1], cmd3[1],
 					     flags);
 
@@ -2805,7 +2950,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 						   &shift, &wd.data128, &next);
 
 			/* Store mbuf2 to LMTLINE/CPT NIXTX area */
-			cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
+			cn10k_nix_xmit_store(txq, tx_pkts[2], segdw[2], next,
 					     cmd0[2], cmd1[2], cmd2[2], cmd3[2],
 					     flags);
 
@@ -2821,7 +2966,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 						   &shift, &wd.data128, &next);
 
 			/* Store mbuf3 to LMTLINE/CPT NIXTX area */
-			cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
+			cn10k_nix_xmit_store(txq, tx_pkts[3], segdw[3], next,
 					     cmd0[3], cmd1[3], cmd2[3], cmd3[3],
 					     flags);
 
@@ -2829,7 +2974,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 			uint8_t j;
 
 			segdw[4] = 8;
-			j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
+			j = cn10k_nix_prep_lmt_mseg_vector(txq, tx_pkts, cmd0, cmd1,
 							  cmd2, cmd3, segdw,
 							  (uint64_t *)
 							  LMT_OFF(laddr, lnum,
diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c
index 3b702d9696..749214cf23 100644
--- a/drivers/net/cnxk/cn9k_ethdev.c
+++ b/drivers/net/cnxk/cn9k_ethdev.c
@@ -50,6 +50,7 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev)
 {
 	struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);
 	uint64_t conf = dev->tx_offloads;
+	struct roc_nix *nix = &dev->nix;
 	uint16_t flags = 0;
 
 	/* Fastpath is dependent on these enums */
@@ -113,6 +114,9 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev)
 	if (dev->tx_mark)
 		flags |= NIX_TX_OFFLOAD_VLAN_QINQ_F;
 
+	if (nix->tx_compl_ena)
+		flags |= NIX_TX_OFFLOAD_MBUF_NOFF_F;
+
 	return flags;
 }
 
@@ -165,12 +169,56 @@ nix_form_default_desc(struct cnxk_eth_dev *dev, struct cn9k_eth_txq *txq,
 	rte_wmb();
 }
 
+static int
+cn9k_nix_tx_compl_setup(struct cnxk_eth_dev *dev,
+		struct cn9k_eth_txq *txq,
+		struct roc_nix_sq *sq, uint16_t nb_desc)
+{
+	struct roc_nix_cq *cq;
+
+	cq = &dev->cqs[sq->cqid];
+	txq->tx_compl.desc_base = (uintptr_t)cq->desc_base;
+	txq->tx_compl.cq_door = cq->door;
+	txq->tx_compl.cq_status = cq->status;
+	txq->tx_compl.wdata = cq->wdata;
+	txq->tx_compl.head = cq->head;
+	txq->tx_compl.qmask = cq->qmask;
+	/* Total array size holding buffers is equal to
+	 * number of entries in cq and sq
+	 * max buffer in array = desc in cq + desc in sq
+	 */
+	txq->tx_compl.nb_desc_mask = (2 * rte_align32pow2(nb_desc)) - 1;
+	txq->tx_compl.ena = true;
+
+	txq->tx_compl.ptr = (struct rte_mbuf **)plt_zmalloc(txq->tx_compl.nb_desc_mask *
+			sizeof(struct rte_mbuf *), 0);
+	if (!txq->tx_compl.ptr)
+		return -1;
+
+	return 0;
+}
+
+static void
+cn9k_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid)
+{
+	struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);
+	struct roc_nix *nix = &dev->nix;
+	struct cn9k_eth_txq *txq;
+
+	cnxk_nix_tx_queue_release(eth_dev, qid);
+	txq = eth_dev->data->tx_queues[qid];
+
+	if (nix->tx_compl_ena)
+		plt_free(txq->tx_compl.ptr);
+}
+
 static int
 cn9k_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			uint16_t nb_desc, unsigned int socket,
 			const struct rte_eth_txconf *tx_conf)
 {
 	struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);
+	struct roc_nix *nix = &dev->nix;
 	uint64_t mark_fmt, mark_flag;
 	struct roc_cpt_lf *inl_lf;
 	struct cn9k_eth_txq *txq;
@@ -190,6 +238,11 @@ cn9k_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Update fast path queue */
 	txq = eth_dev->data->tx_queues[qid];
 	txq->fc_mem = sq->fc;
+	if (nix->tx_compl_ena) {
+		rc = cn9k_nix_tx_compl_setup(dev, txq, sq, nb_desc);
+		if (rc)
+			return rc;
+	}
 	txq->lmt_addr = sq->lmt_addr;
 	txq->io_addr = sq->io_addr;
 	txq->nb_sqb_bufs_adj = sq->nb_sqb_bufs_adj;
@@ -634,6 +687,7 @@ nix_eth_dev_ops_override(void)
 	/* Update platform specific ops */
 	cnxk_eth_dev_ops.dev_configure = cn9k_nix_configure;
 	cnxk_eth_dev_ops.tx_queue_setup = cn9k_nix_tx_queue_setup;
+	cnxk_eth_dev_ops.tx_queue_release = cn9k_nix_tx_queue_release;
 	cnxk_eth_dev_ops.rx_queue_setup = cn9k_nix_rx_queue_setup;
 	cnxk_eth_dev_ops.tx_queue_stop = cn9k_nix_tx_queue_stop;
 	cnxk_eth_dev_ops.dev_start = cn9k_nix_dev_start;
diff --git a/drivers/net/cnxk/cn9k_ethdev.h b/drivers/net/cnxk/cn9k_ethdev.h
index 472a4b06da..a82dcb3d19 100644
--- a/drivers/net/cnxk/cn9k_ethdev.h
+++ b/drivers/net/cnxk/cn9k_ethdev.h
@@ -24,6 +24,7 @@ struct cn9k_eth_txq {
 	uint16_t cpt_desc;
 	uint64_t mark_flag : 8;
 	uint64_t mark_fmt : 48;
+	struct cnxk_eth_txq_comp tx_compl;
 } __plt_cache_aligned;
 
 struct cn9k_eth_rxq {
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index 404edd6aed..17bbdce3a0 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -81,6 +81,28 @@ cn9k_nix_tx_skeleton(struct cn9k_eth_txq *txq, uint64_t *cmd,
 	}
 }
 
+static __rte_always_inline uint64_t
+cn9k_nix_prefree_seg(struct rte_mbuf *m, struct cn9k_eth_txq *txq,
+		struct nix_send_hdr_s *send_hdr)
+{
+	uint32_t sqe_id;
+
+	if (RTE_MBUF_HAS_EXTBUF(m)) {
+		if (send_hdr->w0.pnc) {
+			txq->tx_compl.ptr[send_hdr->w1.sqe_id]->next = m;
+		} else {
+			sqe_id = __atomic_fetch_add(&txq->tx_compl.sqe_id, 1, __ATOMIC_RELAXED);
+			send_hdr->w0.pnc = 1;
+			send_hdr->w1.sqe_id = sqe_id &
+				txq->tx_compl.nb_desc_mask;
+			txq->tx_compl.ptr[send_hdr->w1.sqe_id] = m;
+		}
+		return 1;
+	} else {
+		return cnxk_nix_prefree_seg(m);
+	}
+}
+
 static __rte_always_inline void
 cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
 {
@@ -134,7 +156,8 @@ cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
 }
 
 static __rte_always_inline void
-cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
+cn9k_nix_xmit_prepare(struct cn9k_eth_txq *txq,
+		      struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
 		      const uint64_t lso_tun_fmt, uint8_t mark_flag,
 		      uint64_t mark_fmt)
 {
@@ -325,7 +348,7 @@ cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
 			 *		is greater than 1
 			 * DF bit = 0 otherwise
 			 */
-			send_hdr->w0.df = cnxk_nix_prefree_seg(m);
+			send_hdr->w0.df = cn9k_nix_prefree_seg(m, txq, send_hdr);
 			/* Ensuring mbuf fields which got updated in
 			 * cnxk_nix_prefree_seg are written before LMTST.
 			 */
@@ -401,7 +424,8 @@ cn9k_nix_xmit_submit_lmt_release(const rte_iova_t io_addr)
 }
 
 static __rte_always_inline uint16_t
-cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
+cn9k_nix_prepare_mseg(struct cn9k_eth_txq *txq,
+		      struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 {
 	struct nix_send_hdr_s *send_hdr;
 	union nix_send_sg_s *sg;
@@ -429,7 +453,7 @@ cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 
 	/* Set invert df if buffer is not to be freed by H/W */
 	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
-		sg_u |= (cnxk_nix_prefree_seg(m) << 55);
+		sg_u |= (cn9k_nix_prefree_seg(m, txq, send_hdr) << 55);
 		rte_io_wmb();
 	}
 
@@ -450,7 +474,7 @@ cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 		*slist = rte_mbuf_data_iova(m);
 		/* Set invert df if buffer is not to be freed by H/W */
 		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
-			sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
+			sg_u |= (cn9k_nix_prefree_seg(m, txq, send_hdr) << (i + 55));
 			/* Commit changes to mbuf */
 			rte_io_wmb();
 		}
@@ -520,6 +544,83 @@ cn9k_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr,
 	} while (lmt_status == 0);
 }
 
+static inline uint16_t
+nix_tx_compl_nb_pkts(struct cn9k_eth_txq *txq, const uint64_t wdata,
+		const uint16_t pkts, const uint32_t qmask)
+{
+	uint32_t available = txq->tx_compl.available;
+
+	/* Update the available count if cached value is not enough */
+	if (unlikely(available < pkts)) {
+		uint64_t reg, head, tail;
+
+		/* Use LDADDA version to avoid reorder */
+		reg = roc_atomic64_add_sync(wdata, txq->tx_compl.cq_status);
+		/* CQ_OP_STATUS operation error */
+		if (reg & BIT_ULL(NIX_CQ_OP_STAT_OP_ERR) ||
+				reg & BIT_ULL(NIX_CQ_OP_STAT_CQ_ERR))
+			return 0;
+
+		tail = reg & 0xFFFFF;
+		head = (reg >> 20) & 0xFFFFF;
+		if (tail < head)
+			available = tail - head + qmask + 1;
+		else
+			available = tail - head;
+
+		txq->tx_compl.available = available;
+	}
+	return RTE_MIN(pkts, available);
+}
+
+static inline void
+handle_tx_completion_pkts(struct cn9k_eth_txq *txq, const uint16_t pkts,
+			  uint8_t mt_safe)
+{
+#define CNXK_NIX_CQ_ENTRY_SZ 128
+#define CQE_SZ(x)            ((x) * CNXK_NIX_CQ_ENTRY_SZ)
+
+	uint16_t tx_pkts = 0, nb_pkts;
+	const uintptr_t desc = txq->tx_compl.desc_base;
+	const uint64_t wdata = txq->tx_compl.wdata;
+	const uint32_t qmask = txq->tx_compl.qmask;
+	uint32_t head = txq->tx_compl.head;
+	struct nix_cqe_hdr_s *tx_compl_cq;
+	struct nix_send_comp_s *tx_compl_s0;
+	struct rte_mbuf *m_next, *m;
+
+	if (mt_safe)
+		rte_spinlock_lock(&txq->tx_compl.ext_buf_lock);
+
+	nb_pkts = nix_tx_compl_nb_pkts(txq, wdata, pkts, qmask);
+	while (tx_pkts < nb_pkts) {
+		rte_prefetch_non_temporal((void *)(desc +
+					(CQE_SZ((head + 2) & qmask))));
+		tx_compl_cq = (struct nix_cqe_hdr_s *)
+			(desc + CQE_SZ(head));
+		tx_compl_s0 = (struct nix_send_comp_s *)
+			((uint64_t *)tx_compl_cq + 1);
+		m = txq->tx_compl.ptr[tx_compl_s0->sqe_id];
+		while (m->next != NULL) {
+			m_next = m->next;
+			rte_pktmbuf_free_seg(m);
+			m = m_next;
+		}
+		rte_pktmbuf_free_seg(m);
+
+		head++;
+		head &= qmask;
+		tx_pkts++;
+	}
+	txq->tx_compl.head = head;
+	txq->tx_compl.available -= nb_pkts;
+
+	plt_write64((wdata | nb_pkts), txq->tx_compl.cq_door);
+
+	if (mt_safe)
+		rte_spinlock_unlock(&txq->tx_compl.ext_buf_lock);
+}
+
 static __rte_always_inline uint16_t
 cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
 		   uint64_t *cmd, const uint16_t flags)
@@ -531,6 +632,9 @@ cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
 	uint8_t mark_flag = 0;
 	uint16_t i;
 
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, pkts, 0);
+
 	NIX_XMIT_FC_OR_RETURN(txq, pkts);
 
 	cn9k_nix_tx_skeleton(txq, cmd, flags, 1);
@@ -555,7 +659,7 @@ cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
 		rte_io_wmb();
 
 	for (i = 0; i < pkts; i++) {
-		cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
+		cn9k_nix_xmit_prepare(txq, tx_pkts[i], cmd, flags, lso_tun_fmt,
 				      mark_flag, mark_fmt);
 		cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags, 4,
 					     flags);
@@ -580,6 +684,9 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint16_t segdw;
 	uint64_t i;
 
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, pkts, 0);
+
 	NIX_XMIT_FC_OR_RETURN(txq, pkts);
 
 	cn9k_nix_tx_skeleton(txq, cmd, flags, 1);
@@ -604,9 +711,9 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
 		rte_io_wmb();
 
 	for (i = 0; i < pkts; i++) {
-		cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
+		cn9k_nix_xmit_prepare(txq, tx_pkts[i], cmd, flags, lso_tun_fmt,
 				      mark_flag, mark_fmt);
-		segdw = cn9k_nix_prepare_mseg(tx_pkts[i], cmd, flags);
+		segdw = cn9k_nix_prepare_mseg(txq, tx_pkts[i], cmd, flags);
 		cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags,
 					     segdw, flags);
 		cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
@@ -658,8 +765,9 @@ cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
 }
 
 static __rte_always_inline uint8_t
-cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
-			       union nix_send_hdr_w0_u *sh,
+cn9k_nix_prepare_mseg_vec_list(struct cn9k_eth_txq *txq,
+			       struct rte_mbuf *m, uint64_t *cmd,
+			       struct nix_send_hdr_s *send_hdr,
 			       union nix_send_sg_s *sg, const uint32_t flags)
 {
 	struct rte_mbuf *m_next;
@@ -668,7 +776,7 @@ cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
 	uint64_t segdw;
 	int i = 1;
 
-	sh->total = m->pkt_len;
+	send_hdr->w0.total = m->pkt_len;
 	/* Clear sg->u header before use */
 	sg->u &= 0xFC00000000000000;
 	sg_u = sg->u;
@@ -681,7 +789,7 @@ cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
 
 	/* Set invert df if buffer is not to be freed by H/W */
 	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
-		sg_u |= (cnxk_nix_prefree_seg(m) << 55);
+		sg_u |= (cn9k_nix_prefree_seg(m, txq, send_hdr) << 55);
 		/* Mark mempool object as "put" since it is freed by NIX */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	if (!(sg_u & (1ULL << 55)))
@@ -697,7 +805,7 @@ cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
 		*slist = rte_mbuf_data_iova(m);
 		/* Set invert df if buffer is not to be freed by H/W */
 		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
-			sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
+			sg_u |= (cn9k_nix_prefree_seg(m, txq, send_hdr) << (i + 55));
 			/* Mark mempool object as "put" since it is freed by NIX
 			 */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -731,24 +839,29 @@ cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
 	/* Default dwords */
 	segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +
 		 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
-	sh->sizem1 = segdw - 1;
+	send_hdr->w0.sizem1 = segdw - 1;
 
 	return segdw;
 }
 
 static __rte_always_inline uint8_t
-cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
+cn9k_nix_prepare_mseg_vec(struct cn9k_eth_txq *txq,
+			  struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
 			  uint64x2_t *cmd1, const uint32_t flags)
 {
-	union nix_send_hdr_w0_u sh;
+	struct nix_send_hdr_s send_hdr;
 	union nix_send_sg_s sg;
 	uint8_t ret;
 
 	if (m->nb_segs == 1) {
 		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+			send_hdr.w0.u = vgetq_lane_u64(cmd0[0], 0);
+			send_hdr.w1.u = vgetq_lane_u64(cmd0[0], 1);
 			sg.u = vgetq_lane_u64(cmd1[0], 0);
-			sg.u |= (cnxk_nix_prefree_seg(m) << 55);
+			sg.u |= (cn9k_nix_prefree_seg(m, txq, &send_hdr) << 55);
 			cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
+			cmd0[0] = vsetq_lane_u64(send_hdr.w0.u, cmd0[0], 0);
+			cmd0[0] = vsetq_lane_u64(send_hdr.w1.u, cmd0[0], 1);
 		}
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -761,12 +874,14 @@ cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
 		       !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
 	}
 
-	sh.u = vgetq_lane_u64(cmd0[0], 0);
+	send_hdr.w0.u = vgetq_lane_u64(cmd0[0], 0);
+	send_hdr.w1.u = vgetq_lane_u64(cmd0[0], 1);
 	sg.u = vgetq_lane_u64(cmd1[0], 0);
 
-	ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
+	ret = cn9k_nix_prepare_mseg_vec_list(txq, m, cmd, &send_hdr, &sg, flags);
 
-	cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
+	cmd0[0] = vsetq_lane_u64(send_hdr.w0.u, cmd0[0], 0);
+	cmd0[0] = vsetq_lane_u64(send_hdr.w1.u, cmd0[0], 1);
 	cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
 	return ret;
 }
@@ -908,13 +1023,19 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint64x2_t sgdesc01_w1, sgdesc23_w1;
 	struct cn9k_eth_txq *txq = tx_queue;
 	uint64_t *lmt_addr = txq->lmt_addr;
+	uint64x2_t xmask01_w0, xmask23_w0;
+	uint64x2_t xmask01_w1, xmask23_w1;
 	rte_iova_t io_addr = txq->io_addr;
+	struct nix_send_hdr_s send_hdr;
 	uint64x2_t ltypes01, ltypes23;
 	uint64x2_t xtmp128, ytmp128;
 	uint64x2_t xmask01, xmask23;
 	uint64_t lmt_status, i;
 	uint16_t pkts_left;
 
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, pkts, 0);
+
 	NIX_XMIT_FC_OR_RETURN(txq, pkts);
 
 	pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
@@ -1672,8 +1793,10 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
 		    !(flags & NIX_TX_MULTI_SEG_F)) {
 			/* Set don't free bit if reference count > 1 */
-			xmask01 = vdupq_n_u64(0);
-			xmask23 = xmask01;
+			xmask01_w0 = vdupq_n_u64(0);
+			xmask01_w1 = vdupq_n_u64(0);
+			xmask23_w0 = xmask01_w0;
+			xmask23_w1 = xmask01_w1;
 
 			/* Move mbufs to iova */
 			mbuf0 = (uint64_t *)tx_pkts[0];
@@ -1681,35 +1804,63 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 			mbuf2 = (uint64_t *)tx_pkts[2];
 			mbuf3 = (uint64_t *)tx_pkts[3];
 
-			if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
-				xmask01 = vsetq_lane_u64(0x80000, xmask01, 0);
-			else
+			send_hdr.w0.u = 0;
+			send_hdr.w1.u = 0;
+
+			if (cn9k_nix_prefree_seg((struct rte_mbuf *)mbuf0, txq, &send_hdr)) {
+				send_hdr.w0.df = 1;
+				xmask01_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask01_w0, 0);
+				xmask01_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask01_w1, 0);
+			} else {
 				RTE_MEMPOOL_CHECK_COOKIES(
 					((struct rte_mbuf *)mbuf0)->pool,
 					(void **)&mbuf0, 1, 0);
+			}
 
-			if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
-				xmask01 = vsetq_lane_u64(0x80000, xmask01, 1);
-			else
+			send_hdr.w0.u = 0;
+			send_hdr.w1.u = 0;
+
+			if (cn9k_nix_prefree_seg((struct rte_mbuf *)mbuf1, txq, &send_hdr)) {
+				send_hdr.w0.df = 1;
+				xmask01_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask01_w0, 1);
+				xmask01_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask01_w1, 1);
+			} else {
 				RTE_MEMPOOL_CHECK_COOKIES(
 					((struct rte_mbuf *)mbuf1)->pool,
 					(void **)&mbuf1, 1, 0);
+			}
 
-			if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
-				xmask23 = vsetq_lane_u64(0x80000, xmask23, 0);
-			else
+			send_hdr.w0.u = 0;
+			send_hdr.w1.u = 0;
+
+			if (cn9k_nix_prefree_seg((struct rte_mbuf *)mbuf2, txq, &send_hdr)) {
+				send_hdr.w0.df = 1;
+				xmask23_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask23_w0, 0);
+				xmask23_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask23_w1, 0);
+			} else {
 				RTE_MEMPOOL_CHECK_COOKIES(
 					((struct rte_mbuf *)mbuf2)->pool,
 					(void **)&mbuf2, 1, 0);
+			}
 
-			if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
-				xmask23 = vsetq_lane_u64(0x80000, xmask23, 1);
-			else
+			send_hdr.w0.u = 0;
+			send_hdr.w1.u = 0;
+
+			if (cn9k_nix_prefree_seg((struct rte_mbuf *)mbuf3, txq, &send_hdr)) {
+				send_hdr.w0.df = 1;
+				xmask23_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask23_w0, 1);
+				xmask23_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask23_w1, 1);
+			} else {
 				RTE_MEMPOOL_CHECK_COOKIES(
 					((struct rte_mbuf *)mbuf3)->pool,
 					(void **)&mbuf3, 1, 0);
-			senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
-			senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
+			}
+
+			senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01_w0);
+			senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23_w0);
+			senddesc01_w1 = vorrq_u64(senddesc01_w1, xmask01_w1);
+			senddesc23_w1 = vorrq_u64(senddesc23_w1, xmask23_w1);
+
 			/* Ensuring mbuf fields which got updated in
 			 * cnxk_nix_prefree_seg are written before LMTST.
 			 */
@@ -1769,7 +1920,8 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 			/* Build mseg list for each packet individually. */
 			for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
-				segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j],
+				segdw[j] = cn9k_nix_prepare_mseg_vec(txq,
+							tx_pkts[j],
 							seg_list[j], &cmd0[j],
 							&cmd1[j], flags);
 			segdw[4] = 8;
diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c
index 104aad7b51..1be2e9e776 100644
--- a/drivers/net/cnxk/cnxk_ethdev.c
+++ b/drivers/net/cnxk/cnxk_ethdev.c
@@ -455,7 +455,9 @@ cnxk_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 {
 	struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);
 	const struct eth_dev_ops *dev_ops = eth_dev->dev_ops;
+	struct roc_nix *nix = &dev->nix;
 	struct cnxk_eth_txq_sp *txq_sp;
+	struct roc_nix_cq *cq;
 	struct roc_nix_sq *sq;
 	size_t txq_sz;
 	int rc;
@@ -480,6 +482,19 @@ cnxk_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	sq->max_sqe_sz = nix_sq_max_sqe_sz(dev);
 	sq->tc = ROC_NIX_PFC_CLASS_INVALID;
 
+	if (nix->tx_compl_ena) {
+		sq->cqid = sq->qid + dev->nb_rxq;
+		sq->cq_ena = 1;
+		cq = &dev->cqs[sq->cqid];
+		cq->qid = sq->cqid;
+		cq->nb_desc = nb_desc;
+		rc = roc_nix_cq_init(&dev->nix, cq);
+		if (rc) {
+			plt_err("Failed to init cq=%d, rc=%d", cq->qid, rc);
+			return rc;
+		}
+	}
+
 	rc = roc_nix_sq_init(&dev->nix, sq);
 	if (rc) {
 		plt_err("Failed to init sq=%d, rc=%d", qid, rc);
@@ -513,7 +528,7 @@ cnxk_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	return 0;
 }
 
-static void
+void
 cnxk_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid)
 {
 	void *txq = eth_dev->data->tx_queues[qid];
@@ -1234,7 +1249,7 @@ cnxk_nix_configure(struct rte_eth_dev *eth_dev)
 	if (roc_nix_is_lbk(nix))
 		nix->enable_loop = eth_dev->data->dev_conf.lpbk_mode;
 
-	nix->tx_compl_ena = 0;
+	nix->tx_compl_ena = dev->tx_compl_ena;
 
 	/* Alloc a nix lf */
 	rc = roc_nix_lf_alloc(nix, nb_rxq, nb_txq, rx_cfg);
@@ -1277,6 +1292,15 @@ cnxk_nix_configure(struct rte_eth_dev *eth_dev)
 			goto free_nix_lf;
 		}
 		dev->sqs = qs;
+
+		if (nix->tx_compl_ena) {
+			qs = plt_zmalloc(sizeof(struct roc_nix_cq) * nb_txq, 0);
+			if (!qs) {
+				plt_err("Failed to alloc cqs");
+				goto free_nix_lf;
+			}
+			dev->cqs = qs;
+		}
 	}
 
 	/* Re-enable NIX LF error interrupts */
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index a86e9dba80..4ba40e52b3 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -152,6 +152,21 @@
 
 #define CNXK_TX_MARK_FMT_MASK (0xFFFFFFFFFFFFull)
 
+struct cnxk_eth_txq_comp {
+	uintptr_t desc_base;
+	uintptr_t cq_door;
+	int64_t *cq_status;
+	uint64_t wdata;
+	uint32_t head;
+	uint32_t qmask;
+	uint32_t nb_desc_mask;
+	uint32_t available;
+	uint32_t sqe_id;
+	bool ena;
+	struct rte_mbuf **ptr;
+	rte_spinlock_t ext_buf_lock;
+};
+
 struct cnxk_fc_cfg {
 	enum rte_eth_fc_mode mode;
 	uint8_t rx_pause;
@@ -366,6 +381,7 @@ struct cnxk_eth_dev {
 	uint16_t flags;
 	uint8_t ptype_disable;
 	bool scalar_ena;
+	bool tx_compl_ena;
 	bool tx_mark;
 	bool ptp_en;
 	bool rx_mark_update; /* Enable/Disable mark update to mbuf */
@@ -544,6 +560,7 @@ int cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			    const struct rte_eth_rxconf *rx_conf,
 			    struct rte_mempool *mp);
 int cnxk_nix_tx_queue_start(struct rte_eth_dev *eth_dev, uint16_t qid);
+void cnxk_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid);
 int cnxk_nix_tx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t qid);
 int cnxk_nix_dev_start(struct rte_eth_dev *eth_dev);
 int cnxk_nix_timesync_enable(struct rte_eth_dev *eth_dev);
diff --git a/drivers/net/cnxk/cnxk_ethdev_devargs.c b/drivers/net/cnxk/cnxk_ethdev_devargs.c
index d28509dbda..dbf5bd847d 100644
--- a/drivers/net/cnxk/cnxk_ethdev_devargs.c
+++ b/drivers/net/cnxk/cnxk_ethdev_devargs.c
@@ -231,6 +231,7 @@ parse_sdp_channel_mask(const char *key, const char *value, void *extra_args)
 
 #define CNXK_RSS_RETA_SIZE	"reta_size"
 #define CNXK_SCL_ENABLE		"scalar_enable"
+#define CNXK_TX_COMPL_ENA       "tx_compl_ena"
 #define CNXK_MAX_SQB_COUNT	"max_sqb_count"
 #define CNXK_FLOW_PREALLOC_SIZE "flow_prealloc_size"
 #define CNXK_FLOW_MAX_PRIORITY	"flow_max_priority"
@@ -266,6 +267,7 @@ cnxk_ethdev_parse_devargs(struct rte_devargs *devargs, struct cnxk_eth_dev *dev)
 	struct sdp_channel sdp_chan;
 	uint16_t rss_tag_as_xor = 0;
 	uint16_t scalar_enable = 0;
+	uint16_t tx_compl_ena = 0;
 	uint16_t custom_sa_act = 0;
 	struct rte_kvargs *kvlist;
 	uint16_t no_inl_dev = 0;
@@ -285,6 +287,8 @@ cnxk_ethdev_parse_devargs(struct rte_devargs *devargs, struct cnxk_eth_dev *dev)
 			   &reta_sz);
 	rte_kvargs_process(kvlist, CNXK_SCL_ENABLE, &parse_flag,
 			   &scalar_enable);
+	rte_kvargs_process(kvlist, CNXK_TX_COMPL_ENA, &parse_flag,
+			   &tx_compl_ena);
 	rte_kvargs_process(kvlist, CNXK_MAX_SQB_COUNT, &parse_sqb_count,
 			   &sqb_count);
 	rte_kvargs_process(kvlist, CNXK_FLOW_PREALLOC_SIZE,
@@ -319,6 +323,7 @@ cnxk_ethdev_parse_devargs(struct rte_devargs *devargs, struct cnxk_eth_dev *dev)
 
 null_devargs:
 	dev->scalar_ena = !!scalar_enable;
+	dev->tx_compl_ena = !!tx_compl_ena;
 	dev->inb.no_inl_dev = !!no_inl_dev;
 	dev->inb.min_spi = ipsec_in_min_spi;
 	dev->inb.max_spi = ipsec_in_max_spi;
@@ -349,6 +354,7 @@ cnxk_ethdev_parse_devargs(struct rte_devargs *devargs, struct cnxk_eth_dev *dev)
 RTE_PMD_REGISTER_PARAM_STRING(net_cnxk,
 			      CNXK_RSS_RETA_SIZE "=<64|128|256>"
 			      CNXK_SCL_ENABLE "=1"
+			      CNXK_TX_COMPL_ENA "=1"
 			      CNXK_MAX_SQB_COUNT "=<8-512>"
 			      CNXK_FLOW_PREALLOC_SIZE "=<1-32>"
 			      CNXK_FLOW_MAX_PRIORITY "=<1-32>"
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 06/11] net/cnxk: fix packet type for IPv6 packets post decryption
  2022-11-28  9:54 [PATCH 01/11] common/cnxk: free pending sqe buffers Nithin Dabilpuram
                   ` (3 preceding siblings ...)
  2022-11-28  9:54 ` [PATCH 05/11] event/cnxk: net/cnxk: support transmit completion Nithin Dabilpuram
@ 2022-11-28  9:54 ` Nithin Dabilpuram
  2022-11-28  9:54 ` [PATCH 07/11] net/cnxk: add late backpressure support for cn10kb Nithin Dabilpuram
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Nithin Dabilpuram @ 2022-11-28  9:54 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao; +Cc: jerinj, dev

Update ptype properly for IPv6 packets post inline decryption.
Fixes: 4440eb88ddfc ("net/cnxk: use full context IPsec structures")

Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
---
 drivers/net/cnxk/cn9k_rx.h | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
index 1a9f920b41..0e23609df5 100644
--- a/drivers/net/cnxk/cn9k_rx.h
+++ b/drivers/net/cnxk/cn9k_rx.h
@@ -260,8 +260,8 @@ nix_rx_sec_mbuf_err_update(const union nix_rx_parse_u *rx, uint16_t res,
 }
 
 static __rte_always_inline uint64_t
-nix_rx_sec_mbuf_update(const struct nix_cqe_hdr_s *cq, struct rte_mbuf *m,
-		       uintptr_t sa_base, uint64_t *rearm_val, uint16_t *len)
+nix_rx_sec_mbuf_update(const struct nix_cqe_hdr_s *cq, struct rte_mbuf *m, uintptr_t sa_base,
+		       uint64_t *rearm_val, uint16_t *len, uint32_t packet_type)
 {
 	uintptr_t res_sg0 = ((uintptr_t)cq + ROC_ONF_IPSEC_INB_RES_OFF - 8);
 	const union nix_rx_parse_u *rx =
@@ -315,15 +315,18 @@ nix_rx_sec_mbuf_update(const struct nix_cqe_hdr_s *cq, struct rte_mbuf *m,
 	ip = (struct rte_ipv4_hdr *)(data + ROC_ONF_IPSEC_INB_SPI_SEQ_SZ +
 				     ROC_ONF_IPSEC_INB_MAX_L2_SZ);
 
+	packet_type = (packet_type & ~(RTE_PTYPE_L3_MASK | RTE_PTYPE_TUNNEL_MASK));
 	if (((ip->version_ihl & 0xf0) >> RTE_IPV4_IHL_MULTIPLIER) ==
 	    IPVERSION) {
 		*len = rte_be_to_cpu_16(ip->total_length) + lcptr;
+		packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
 	} else {
 		PLT_ASSERT(((ip->version_ihl & 0xf0) >>
 			    RTE_IPV4_IHL_MULTIPLIER) == 6);
 		ip6 = (struct rte_ipv6_hdr *)ip;
 		*len = rte_be_to_cpu_16(ip6->payload_len) +
 		       sizeof(struct rte_ipv6_hdr) + lcptr;
+		packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
 	}
 
 	/* Update data offset */
@@ -332,6 +335,7 @@ nix_rx_sec_mbuf_update(const struct nix_cqe_hdr_s *cq, struct rte_mbuf *m,
 	*rearm_val = *rearm_val & ~(BIT_ULL(16) - 1);
 	*rearm_val |= data_off;
 
+	m->packet_type = packet_type;
 	return RTE_MBUF_F_RX_SEC_OFFLOAD;
 }
 
@@ -363,14 +367,7 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
 		/* Get SA Base from lookup mem */
 		sa_base = cnxk_nix_sa_base_get(port, lookup_mem);
 
-		ol_flags |= nix_rx_sec_mbuf_update(cq, mbuf, sa_base, &val,
-						   &len);
-
-		/* Only Tunnel inner IPv4 is supported */
-		packet_type = (packet_type &
-			       ~(RTE_PTYPE_L3_MASK | RTE_PTYPE_TUNNEL_MASK));
-		packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
-		mbuf->packet_type = packet_type;
+		ol_flags |= nix_rx_sec_mbuf_update(cq, mbuf, sa_base, &val, &len, packet_type);
 		goto skip_parse;
 	}
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 07/11] net/cnxk: add late backpressure support for cn10kb
  2022-11-28  9:54 [PATCH 01/11] common/cnxk: free pending sqe buffers Nithin Dabilpuram
                   ` (4 preceding siblings ...)
  2022-11-28  9:54 ` [PATCH 06/11] net/cnxk: fix packet type for IPv6 packets post decryption Nithin Dabilpuram
@ 2022-11-28  9:54 ` Nithin Dabilpuram
  2022-11-28  9:54 ` [PATCH 08/11] common/cnxk: use lcore LMT line for CPT context write Nithin Dabilpuram
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Nithin Dabilpuram @ 2022-11-28  9:54 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao; +Cc: jerinj, dev

Add late backpressure support for cn10kb and set it up
to backpressure CPT.

Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
---
 drivers/common/cnxk/hw/nix.h        | 18 ++++++++++++------
 drivers/common/cnxk/roc_model.h     |  6 ++++++
 drivers/common/cnxk/roc_nix_debug.c | 10 ++++++++--
 drivers/common/cnxk/roc_nix_irq.c   |  3 +++
 drivers/common/cnxk/roc_nix_priv.h  |  3 +++
 drivers/common/cnxk/roc_nix_queue.c | 14 +++++++++++++-
 drivers/net/cnxk/cnxk_ethdev.c      |  5 +++--
 7 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/drivers/common/cnxk/hw/nix.h b/drivers/common/cnxk/hw/nix.h
index 425c335bf3..0d8f2a5e9b 100644
--- a/drivers/common/cnxk/hw/nix.h
+++ b/drivers/common/cnxk/hw/nix.h
@@ -861,6 +861,7 @@
 #define NIX_CQERRINT_DOOR_ERR  (0x0ull)
 #define NIX_CQERRINT_WR_FULL   (0x1ull)
 #define NIX_CQERRINT_CQE_FAULT (0x2ull)
+#define NIX_CQERRINT_CPT_DROP  (0x3ull) /* [CN10KB, .) */
 
 #define NIX_LINK_SDP (0xdull) /* [CN10K, .) */
 #define NIX_LINK_CPT (0xeull) /* [CN10K, .) */
@@ -1009,11 +1010,12 @@ struct nix_cqe_hdr_s {
 /* NIX completion queue context structure */
 struct nix_cq_ctx_s {
 	uint64_t base : 64; /* W0 */
-	uint64_t rsvd_67_64 : 4;
+	uint64_t lbp_ena : 1;
+	uint64_t lbpid_low : 3;
 	uint64_t bp_ena : 1;
-	uint64_t rsvd_71_69 : 3;
+	uint64_t lbpid_med : 3;
 	uint64_t bpid : 9;
-	uint64_t rsvd_83_81 : 3;
+	uint64_t lbpid_high : 3;
 	uint64_t qint_idx : 7;
 	uint64_t cq_err : 1;
 	uint64_t cint_idx : 7;
@@ -1027,10 +1029,14 @@ struct nix_cq_ctx_s {
 	uint64_t drop : 8;
 	uint64_t drop_ena : 1;
 	uint64_t ena : 1;
-	uint64_t rsvd_211_210 : 2;
-	uint64_t substream : 20;
+	uint64_t cpt_drop_err_en : 1;
+	uint64_t rsvd_211 : 1;
+	uint64_t substream : 12;
+	uint64_t stash_thresh : 4;
+	uint64_t lbp_frac : 4;
 	uint64_t caching : 1;
-	uint64_t rsvd_235_233 : 3;
+	uint64_t stashing : 1;
+	uint64_t rsvd_235_234 : 2;
 	uint64_t qsize : 4;
 	uint64_t cq_err_int : 8;
 	uint64_t cq_err_int_ena : 8;
diff --git a/drivers/common/cnxk/roc_model.h b/drivers/common/cnxk/roc_model.h
index 1985dd771d..848609eb35 100644
--- a/drivers/common/cnxk/roc_model.h
+++ b/drivers/common/cnxk/roc_model.h
@@ -236,6 +236,12 @@ roc_model_is_cnf10kb_a0(void)
 	return roc_model->flag & ROC_MODEL_CNF105xxN_A0;
 }
 
+static inline uint64_t
+roc_model_is_cn10kb(void)
+{
+	return roc_model->flag & ROC_MODEL_CN103xx;
+}
+
 static inline bool
 roc_env_is_hw(void)
 {
diff --git a/drivers/common/cnxk/roc_nix_debug.c b/drivers/common/cnxk/roc_nix_debug.c
index 6f82350b53..e491060765 100644
--- a/drivers/common/cnxk/roc_nix_debug.c
+++ b/drivers/common/cnxk/roc_nix_debug.c
@@ -665,8 +665,14 @@ nix_lf_cq_dump(__io struct nix_cq_ctx_s *ctx, FILE *file)
 		 ctx->cq_err_int_ena, ctx->cq_err_int);
 	nix_dump(file, "W3: qsize \t\t\t%d\nW3: caching \t\t\t%d", ctx->qsize,
 		 ctx->caching);
-	nix_dump(file, "W3: substream \t\t\t0x%03x\nW3: ena \t\t\t%d", ctx->substream,
-		 ctx->ena);
+	nix_dump(file, "W3: substream \t\t\t0x%03x\nW3: ena \t\t\t%d\nW3: lbp_ena \t\t\t%d",
+		 ctx->substream, ctx->ena, ctx->lbp_ena);
+	nix_dump(file,
+		 "W3: lbpid_high \t\t\t0x%03x\nW3: lbpid_med \t\t\t0x%03x\n"
+		 "W3: lbpid_low \t\t\t0x%03x\n(W3: lbpid) \t\t\t0x%03x",
+		 ctx->lbpid_high, ctx->lbpid_med, ctx->lbpid_low,
+		 ctx->lbpid_high << 6 | ctx->lbpid_med << 3 | ctx->lbpid_low);
+	nix_dump(file, "W3: lbp_frac \t\t\t%d\n", ctx->lbp_frac);
 	nix_dump(file, "W3: drop_ena \t\t\t%d\nW3: drop \t\t\t%d", ctx->drop_ena,
 		 ctx->drop);
 	nix_dump(file, "W3: bp \t\t\t\t%d\n", ctx->bp);
diff --git a/drivers/common/cnxk/roc_nix_irq.c b/drivers/common/cnxk/roc_nix_irq.c
index 661af79193..2e4bccb713 100644
--- a/drivers/common/cnxk/roc_nix_irq.c
+++ b/drivers/common/cnxk/roc_nix_irq.c
@@ -287,6 +287,9 @@ nix_lf_q_irq(void *param)
 
 		if (irq & BIT_ULL(NIX_CQERRINT_CQE_FAULT))
 			plt_err("CQ=%d NIX_CQERRINT_CQE_FAULT", cq);
+
+		if (irq & BIT_ULL(NIX_CQERRINT_CPT_DROP))
+			plt_err("CQ=%d NIX_CQERRINT_CPT_DROP", cq);
 	}
 
 	/* Handle SQ interrupts */
diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h
index 02290a1b86..0a9461c856 100644
--- a/drivers/common/cnxk/roc_nix_priv.h
+++ b/drivers/common/cnxk/roc_nix_priv.h
@@ -16,6 +16,9 @@
 
 /* Apply BP/DROP when CQ is 95% full */
 #define NIX_CQ_THRESH_LEVEL	(5 * 256 / 100)
+#define NIX_CQ_SEC_THRESH_LEVEL (25 * 256 / 100)
+/* Apply LBP at 75% of actual BP */
+#define NIX_CQ_LPB_THRESH_FRAC	(75 * 16 / 100)
 #define NIX_CQ_FULL_ERRATA_SKID (1024ull * 256)
 #define NIX_RQ_AURA_THRESH(x)	(((x)*95) / 100)
 
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index 0dd3c8d4df..5fad8e4543 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -743,6 +743,8 @@ roc_nix_cq_init(struct roc_nix *roc_nix, struct roc_nix_cq *cq)
 	struct nix *nix = roc_nix_to_nix_priv(roc_nix);
 	struct mbox *mbox = (&nix->dev)->mbox;
 	volatile struct nix_cq_ctx_s *cq_ctx;
+	uint16_t drop_thresh = NIX_CQ_THRESH_LEVEL;
+	uint16_t cpt_lbpid = nix->bpid[0];
 	enum nix_q_size qsize;
 	size_t desc_sz;
 	int rc;
@@ -797,6 +799,16 @@ roc_nix_cq_init(struct roc_nix *roc_nix, struct roc_nix_cq *cq)
 	cq_ctx->avg_level = 0xff;
 	cq_ctx->cq_err_int_ena = BIT(NIX_CQERRINT_CQE_FAULT);
 	cq_ctx->cq_err_int_ena |= BIT(NIX_CQERRINT_DOOR_ERR);
+	if (roc_model_is_cn10kb() && roc_nix_inl_inb_is_enabled(roc_nix)) {
+		cq_ctx->cq_err_int_ena |= BIT(NIX_CQERRINT_CPT_DROP);
+		cq_ctx->cpt_drop_err_en = 1;
+		cq_ctx->lbp_ena = 1;
+		cq_ctx->lbpid_low = cpt_lbpid & 0x7;
+		cq_ctx->lbpid_med = (cpt_lbpid >> 3) & 0x7;
+		cq_ctx->lbpid_high = (cpt_lbpid >> 6) & 0x7;
+		cq_ctx->lbp_frac = NIX_CQ_LPB_THRESH_FRAC;
+		drop_thresh = NIX_CQ_SEC_THRESH_LEVEL;
+	}
 
 	/* Many to one reduction */
 	cq_ctx->qint_idx = cq->qid % nix->qints;
@@ -812,7 +824,7 @@ roc_nix_cq_init(struct roc_nix *roc_nix, struct roc_nix_cq *cq)
 		cq_ctx->drop_ena = 1;
 		cq->drop_thresh = min_rx_drop;
 	} else {
-		cq->drop_thresh = NIX_CQ_THRESH_LEVEL;
+		cq->drop_thresh = drop_thresh;
 		/* Drop processing or red drop cannot be enabled due to
 		 * due to packets coming for second pass from CPT.
 		 */
diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c
index 1be2e9e776..d711eb6b27 100644
--- a/drivers/net/cnxk/cnxk_ethdev.c
+++ b/drivers/net/cnxk/cnxk_ethdev.c
@@ -49,8 +49,9 @@ nix_inl_cq_sz_clamp_up(struct roc_nix *nix, struct rte_mempool *mp,
 	struct roc_nix_rq *inl_rq;
 	uint64_t limit;
 
+	/* For CN10KB and above, LBP needs minimum CQ size */
 	if (!roc_errata_cpt_hang_on_x2p_bp())
-		return nb_desc;
+		return RTE_MAX(nb_desc, (uint32_t)4096);
 
 	/* CQ should be able to hold all buffers in first pass RQ's aura
 	 * this RQ's aura.
@@ -695,7 +696,7 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	first_skip += RTE_PKTMBUF_HEADROOM;
 	first_skip += rte_pktmbuf_priv_size(lpb_pool);
 	rq->first_skip = first_skip;
-	rq->later_skip = sizeof(struct rte_mbuf) + rte_pktmbuf_priv_size(mp);
+	rq->later_skip = sizeof(struct rte_mbuf) + rte_pktmbuf_priv_size(lpb_pool);
 	rq->lpb_size = lpb_pool->elt_size;
 	if (roc_errata_nix_no_meta_aura())
 		rq->lpb_drop_ena = !(dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 08/11] common/cnxk: use lcore LMT line for CPT context write
  2022-11-28  9:54 [PATCH 01/11] common/cnxk: free pending sqe buffers Nithin Dabilpuram
                   ` (5 preceding siblings ...)
  2022-11-28  9:54 ` [PATCH 07/11] net/cnxk: add late backpressure support for cn10kb Nithin Dabilpuram
@ 2022-11-28  9:54 ` Nithin Dabilpuram
  2022-11-28  9:54 ` [PATCH 09/11] common/cnxk: convert aura handle to aura Nithin Dabilpuram
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Nithin Dabilpuram @ 2022-11-28  9:54 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao; +Cc: jerinj, dev

Use lcore LMT line for CPT context write as no one else
is using it.

Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
---
 drivers/common/cnxk/roc_cpt.c | 3 ++-
 drivers/common/cnxk/roc_io.h  | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/common/cnxk/roc_cpt.c b/drivers/common/cnxk/roc_cpt.c
index 9a90d4d853..e336edb77e 100644
--- a/drivers/common/cnxk/roc_cpt.c
+++ b/drivers/common/cnxk/roc_cpt.c
@@ -1020,7 +1020,8 @@ roc_cpt_ctx_write(struct roc_cpt_lf *lf, void *sa_dptr, void *sa_cptr,
 	uint64_t *dptr;
 	int i;
 
-	ROC_LMT_CPT_BASE_ID_GET(lmt_base, lmt_id);
+	/* Use this lcore's LMT line as no one else is using it */
+	ROC_LMT_BASE_ID_GET(lmt_base, lmt_id);
 	inst = (struct cpt_inst_s *)lmt_base;
 
 	memset(inst, 0, sizeof(struct cpt_inst_s));
diff --git a/drivers/common/cnxk/roc_io.h b/drivers/common/cnxk/roc_io.h
index 13f98ed549..1e5c1f8c04 100644
--- a/drivers/common/cnxk/roc_io.h
+++ b/drivers/common/cnxk/roc_io.h
@@ -7,6 +7,7 @@
 
 #include "roc_platform.h" /* for __plt_always_inline macro */
 
+#ifndef ROC_LMT_BASE_ID_GET
 #define ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id)                                  \
 	do {                                                                   \
 		/* 32 Lines per core */                                        \
@@ -14,7 +15,10 @@
 		/* Each line is of 128B */                                     \
 		(lmt_addr) += ((uint64_t)lmt_id << ROC_LMT_LINE_SIZE_LOG2);    \
 	} while (0)
+#endif
 
+/* Define it if not defined in roc_platform.h */
+#ifndef ROC_LMT_CPT_BASE_ID_GET
 #define ROC_LMT_CPT_BASE_ID_GET(lmt_addr, lmt_id)                              \
 	do {                                                                   \
 		/* 16 Lines per core */                                        \
@@ -23,6 +27,7 @@
 		/* Each line is of 128B */                                     \
 		(lmt_addr) += ((uint64_t)lmt_id << ROC_LMT_LINE_SIZE_LOG2);    \
 	} while (0)
+#endif
 
 #define roc_load_pair(val0, val1, addr)                                        \
 	({                                                                     \
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 09/11] common/cnxk: convert aura handle to aura
  2022-11-28  9:54 [PATCH 01/11] common/cnxk: free pending sqe buffers Nithin Dabilpuram
                   ` (6 preceding siblings ...)
  2022-11-28  9:54 ` [PATCH 08/11] common/cnxk: use lcore LMT line for CPT context write Nithin Dabilpuram
@ 2022-11-28  9:54 ` Nithin Dabilpuram
  2022-11-28  9:54 ` [PATCH 10/11] net/cnxk: mark HW errors as bad checksum Nithin Dabilpuram
  2022-11-28  9:54 ` [PATCH 11/11] common/cnxk: disable drop re in A1 chip revision Nithin Dabilpuram
  9 siblings, 0 replies; 12+ messages in thread
From: Nithin Dabilpuram @ 2022-11-28  9:54 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
  Cc: jerinj, dev, Kommula Shiva Shankar

From: Kommula Shiva Shankar <kshankar@marvell.com>

This patch converts aura handle to aura id
while initialising inline dev sso xaq aura

Signed-off-by: Kommula Shiva Shankar <kshankar@marvell.com>
---
 drivers/common/cnxk/roc_nix_inl_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/common/cnxk/roc_nix_inl_dev.c b/drivers/common/cnxk/roc_nix_inl_dev.c
index c3d94dd0da..4ab4209dba 100644
--- a/drivers/common/cnxk/roc_nix_inl_dev.c
+++ b/drivers/common/cnxk/roc_nix_inl_dev.c
@@ -265,7 +265,7 @@ nix_inl_sso_setup(struct nix_inl_dev *inl_dev)
 	}
 
 	/* Setup xaq for hwgrps */
-	rc = sso_hwgrp_alloc_xaq(dev, inl_dev->xaq.aura_handle, 1);
+	rc = sso_hwgrp_alloc_xaq(dev, roc_npa_aura_handle_to_aura(inl_dev->xaq.aura_handle), 1);
 	if (rc) {
 		plt_err("Failed to setup hwgrp xaq aura, rc=%d", rc);
 		goto destroy_pool;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 10/11] net/cnxk: mark HW errors as bad checksum
  2022-11-28  9:54 [PATCH 01/11] common/cnxk: free pending sqe buffers Nithin Dabilpuram
                   ` (7 preceding siblings ...)
  2022-11-28  9:54 ` [PATCH 09/11] common/cnxk: convert aura handle to aura Nithin Dabilpuram
@ 2022-11-28  9:54 ` Nithin Dabilpuram
  2022-11-28  9:54 ` [PATCH 11/11] common/cnxk: disable drop re in A1 chip revision Nithin Dabilpuram
  9 siblings, 0 replies; 12+ messages in thread
From: Nithin Dabilpuram @ 2022-11-28  9:54 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
  Cc: jerinj, dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Mark NIX packet parsing errors due to NPC/NPA errors as bad
packets by setting L3/L4 checksum as BAD in olflags.
Application can decide the fate of these packets.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/net/cnxk/cnxk_lookup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/cnxk/cnxk_lookup.c b/drivers/net/cnxk/cnxk_lookup.c
index f36fb8f27a..e1f69b4f34 100644
--- a/drivers/net/cnxk/cnxk_lookup.c
+++ b/drivers/net/cnxk/cnxk_lookup.c
@@ -286,8 +286,8 @@ nix_create_rx_ol_flags_array(void *mem)
 				   errcode == NIX_RX_PERRCODE_OL3_LEN) {
 				val |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
 			} else {
-				val |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
-				val |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
+				val |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
+				val |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
 			}
 			break;
 		}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 11/11] common/cnxk: disable drop re in A1 chip revision
  2022-11-28  9:54 [PATCH 01/11] common/cnxk: free pending sqe buffers Nithin Dabilpuram
                   ` (8 preceding siblings ...)
  2022-11-28  9:54 ` [PATCH 10/11] net/cnxk: mark HW errors as bad checksum Nithin Dabilpuram
@ 2022-11-28  9:54 ` Nithin Dabilpuram
  2023-01-06 13:22   ` Jerin Jacob
  9 siblings, 1 reply; 12+ messages in thread
From: Nithin Dabilpuram @ 2022-11-28  9:54 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
  Cc: jerinj, dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Disable drop re in CN10KA A1 as enabling it would leak
event vector buffers.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_errata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/common/cnxk/roc_errata.h b/drivers/common/cnxk/roc_errata.h
index a39796e894..36e6db467a 100644
--- a/drivers/common/cnxk/roc_errata.h
+++ b/drivers/common/cnxk/roc_errata.h
@@ -9,7 +9,7 @@ static inline bool
 roc_errata_nix_has_no_drop_re(void)
 {
 	return (roc_model_is_cn10ka_a0() || roc_model_is_cnf10ka_a0() ||
-		roc_model_is_cnf10kb_a0());
+		roc_model_is_cnf10kb_a0() || roc_model_is_cn10ka_a1());
 }
 
 /* Errata NIX-34873 */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 11/11] common/cnxk: disable drop re in A1 chip revision
  2022-11-28  9:54 ` [PATCH 11/11] common/cnxk: disable drop re in A1 chip revision Nithin Dabilpuram
@ 2023-01-06 13:22   ` Jerin Jacob
  0 siblings, 0 replies; 12+ messages in thread
From: Jerin Jacob @ 2023-01-06 13:22 UTC (permalink / raw)
  To: Nithin Dabilpuram
  Cc: Kiran Kumar K, Sunil Kumar Kori, Satha Rao, jerinj, dev, Pavan Nikhilesh

On Mon, Nov 28, 2022 at 3:25 PM Nithin Dabilpuram
<ndabilpuram@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Disable drop re in CN10KA A1 as enabling it would leak
> event vector buffers.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>

Updated the git commits[2] and minor cleanup[1] in 1/11 as follows and
applied to dpdk-next-net-mrvl/for-next-net. Thanks

[1]

diff --git a/drivers/common/cnxk/roc_nix_irq.c
b/drivers/common/cnxk/roc_nix_irq.c
index 2e4bccb713..2b731302cd 100644
--- a/drivers/common/cnxk/roc_nix_irq.c
+++ b/drivers/common/cnxk/roc_nix_irq.c
@@ -328,9 +328,8 @@ nix_lf_q_irq(void *param)
        roc_nix_queues_ctx_dump(nix_priv_to_roc_nix(nix), NULL);

        /* Call reset callback */
-       if (intr_cb)
-               if (dev->ops->q_err_cb)
-                       dev->ops->q_err_cb(nix_priv_to_roc_nix(nix), NULL);
+       if (intr_cb && dev->ops->q_err_cb)
+               dev->ops->q_err_cb(nix_priv_to_roc_nix(nix), NULL);

[2]
commit 6141ebf5fbf3c2ccc4b78ab9491ccb43e570565b (HEAD -> for-next-net,
origin/for-next-net)
Author: Pavan Nikhilesh <pbhagavatula@marvell.com>
Date:   Mon Nov 28 15:24:42 2022 +0530

    common/cnxk: disable drop receive error in A1 revision

    Disable drop receive error in CN10KA A1 as enabling it would leak
    event vector buffers.

    Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>

commit b7feb7d248072637d7143c6686ffc1ee7e04a3a7
Author: Pavan Nikhilesh <pbhagavatula@marvell.com>
Date:   Mon Nov 28 15:24:41 2022 +0530

    net/cnxk: mark HW errors as bad checksum

    Mark NIX packet parsing errors due to NPC/NPA errors as bad
    packets by setting L3/L4 checksum as BAD in olflags.
    Application can decide the fate of these packets.

    Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>

commit 3dab27b88d5b367bf1d83f5ac6efc89bb9f0cc6d
Author: Kommula Shiva Shankar <kshankar@marvell.com>
Date:   Mon Nov 28 15:24:40 2022 +0530

    common/cnxk: fix aura ID handling

    Fix the  aura handle to aura id while initialising
    inline dev SSO XAQ aura

    Fixes: 6f30ac80ca6b ("common/cnxk: use XAQ create API for inline device")
    Cc: stable@dpdk.org

    Signed-off-by: Kommula Shiva Shankar <kshankar@marvell.com>

commit ff38e5da017d04760762fcb30227c91f338abb93
Author: Nithin Dabilpuram <ndabilpuram@marvell.com>
Date:   Mon Nov 28 15:24:39 2022 +0530

    common/cnxk: use lcore LMT line for CPT context write

    Use lcore LMT line for CPT context write as no one else
    is using it.

    Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>

commit 9ac550f477b93c413d7dd57bf280d1d83a8436aa
Author: Nithin Dabilpuram <ndabilpuram@marvell.com>
Date:   Mon Nov 28 15:24:38 2022 +0530

    net/cnxk: support late backpressure for cn10kb

    Add late backpressure support for cn10kb and set it up
    to backpressure CPT.

    Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>

commit a5d5985247aa3bda9790df6961d57ceeb62c9e54
Author: Nithin Dabilpuram <ndabilpuram@marvell.com>
Date:   Mon Nov 28 15:24:37 2022 +0530

    net/cnxk: fix packet type for IPv6 packets post decryption

    Update ptype properly for IPv6 packets post inline decryption.

    Fixes: 4440eb88ddfc ("net/cnxk: use full context IPsec structures")
    Cc: stable@dpdk.org

    Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>

commit 323b51f6adb985711d38069d1390e0bfa2560a3b
Author: Rakesh Kudurumalla <rkudurumalla@marvell.com>
Date:   Mon Nov 28 15:24:36 2022 +0530

    event/cnxk: net/cnxk: support transmit completion

    added support to call callback handler provided
    by user when external buffer is attached to mbuf
    and transmit completion is enabled.Added support to
    enable transmit completion as device args

    Signed-off-by: Rakesh Kudurumalla <rkudurumalla@marvell.com>

commit 8950250f5241d12f7b32f424dab74bdf67350183
Author: Satha Rao <skoteshwar@marvell.com>
Date:   Mon Nov 28 15:24:35 2022 +0530

    common/cnxk: split NIX TM hierarchy enable API

    roc_nix_tm_hierarchy_enable() API will do two things internally,
    1) Creation of all TM nodes, allocate HW resources and connect
       them as requested.
    2) Enable transmit by XON SMQ and start SQs

    In test cases where both steps called independently. In order
    to support this, patch split the functionality into two APIs.

    Signed-off-by: Satha Rao <skoteshwar@marvell.com>

commit 05faa3527fdb9ca1875b5dab61c2916d9f0e3239
Author: Satha Rao <skoteshwar@marvell.com>
Date:   Mon Nov 28 15:24:34 2022 +0530

    common/cnxk: set default SQ TC value

    Zero is valid TC, initialize SQ TC value to ROC_NIX_PFC_CLASS_INVALID.

    Signed-off-by: Satha Rao <skoteshwar@marvell.com>

commit e5b2caa96a0358d83efb0e495074f69f0c1de034
Author: Satha Rao <skoteshwar@marvell.com>
Date:   Mon Nov 28 15:24:33 2022 +0530

    net/cnxk: register callback to get queue errors

    Register a callback mechanism to get SQ error interrupts, add
    call ethdev event RTE_ETH_EVENT_INTR_RESET.

    Signed-off-by: Satha Rao <skoteshwar@marvell.com>

commit 5f0d6a1ab3b4ff73cdbce8158cabe6d9a42cdcf8
Author: Satha Rao <skoteshwar@marvell.com>
Date:   Mon Nov 28 15:24:32 2022 +0530

    common/cnxk: free pending SQE buffers

    This patch provides a callback mechanism when SQ receives MNQ_ERR.
    Even when SQ got MNQ_ERR interrupt application still enqueue
    packets for sending they will be struck at SQ, so we are freeing
    all these pending packets when we called SQ finish.

    Signed-off-by: Satha Rao <skoteshwar@marvell.com>
> ---
>  drivers/common/cnxk/roc_errata.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/common/cnxk/roc_errata.h b/drivers/common/cnxk/roc_errata.h
> index a39796e894..36e6db467a 100644
> --- a/drivers/common/cnxk/roc_errata.h
> +++ b/drivers/common/cnxk/roc_errata.h
> @@ -9,7 +9,7 @@ static inline bool
>  roc_errata_nix_has_no_drop_re(void)
>  {
>         return (roc_model_is_cn10ka_a0() || roc_model_is_cnf10ka_a0() ||
> -               roc_model_is_cnf10kb_a0());
> +               roc_model_is_cnf10kb_a0() || roc_model_is_cn10ka_a1());
>  }
>
>  /* Errata NIX-34873 */
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2023-01-06 13:23 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-28  9:54 [PATCH 01/11] common/cnxk: free pending sqe buffers Nithin Dabilpuram
2022-11-28  9:54 ` [PATCH 02/11] net/cnxk: register callback to get queue errors Nithin Dabilpuram
2022-11-28  9:54 ` [PATCH 03/11] common/cnxk: set default SQ TC value Nithin Dabilpuram
2022-11-28  9:54 ` [PATCH 04/11] common/cnxk: split NIX TM hierarchy enable API Nithin Dabilpuram
2022-11-28  9:54 ` [PATCH 05/11] event/cnxk: net/cnxk: support transmit completion Nithin Dabilpuram
2022-11-28  9:54 ` [PATCH 06/11] net/cnxk: fix packet type for IPv6 packets post decryption Nithin Dabilpuram
2022-11-28  9:54 ` [PATCH 07/11] net/cnxk: add late backpressure support for cn10kb Nithin Dabilpuram
2022-11-28  9:54 ` [PATCH 08/11] common/cnxk: use lcore LMT line for CPT context write Nithin Dabilpuram
2022-11-28  9:54 ` [PATCH 09/11] common/cnxk: convert aura handle to aura Nithin Dabilpuram
2022-11-28  9:54 ` [PATCH 10/11] net/cnxk: mark HW errors as bad checksum Nithin Dabilpuram
2022-11-28  9:54 ` [PATCH 11/11] common/cnxk: disable drop re in A1 chip revision Nithin Dabilpuram
2023-01-06 13:22   ` Jerin Jacob

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).