DPDK patches and discussions
 help / color / mirror / Atom feed
From: Aakash Sasidharan <asasidharan@marvell.com>
To: Nithin Dabilpuram <ndabilpuram@marvell.com>,
	Kiran Kumar K <kirankumark@marvell.com>,
	Sunil Kumar Kori <skori@marvell.com>,
	Satha Rao <skoteshwar@marvell.com>,
	Harman Kalra <hkalra@marvell.com>,
	Ankur Dwivedi <adwivedi@marvell.com>,
	Anoob Joseph <anoobj@marvell.com>,
	Tejasree Kondoj <ktejasree@marvell.com>,
	Pavan Nikhilesh <pbhagavatula@marvell.com>,
	"Shijith Thotton" <sthotton@marvell.com>
Cc: <gakhil@marvell.com>, <jerinj@marvell.com>,
	<vvelumuri@marvell.com>, <asasidharan@marvell.com>,
	<dev@dpdk.org>
Subject: [PATCH v3 12/12] crypto/cnxk: enable dual submission to CPT
Date: Wed, 26 Jun 2024 16:25:34 +0530	[thread overview]
Message-ID: <20240626105534.1386528-13-asasidharan@marvell.com> (raw)
In-Reply-To: <20240626105534.1386528-1-asasidharan@marvell.com>

From: Anoob Joseph <anoobj@marvell.com>

Submit two instructions in one LMTLINE.

Signed-off-by: Anoob Joseph <anoobj@marvell.com>
Signed-off-by: Aakash Sasidharan <asasidharan@marvell.com>
---
 drivers/common/cnxk/roc_cpt.c             |  17 +-
 drivers/common/cnxk/roc_cpt.h             |   8 +-
 drivers/crypto/cnxk/cn10k_cryptodev_ops.c | 184 +++++-----------------
 drivers/crypto/cnxk/cn10k_cryptodev_ops.h |  60 ++++++-
 drivers/crypto/cnxk/cnxk_cryptodev_ops.c  |  47 ++----
 drivers/crypto/cnxk/cnxk_cryptodev_ops.h  |   2 +
 drivers/event/cnxk/cnxk_eventdev_adptr.c  |   4 +-
 7 files changed, 124 insertions(+), 198 deletions(-)

diff --git a/drivers/common/cnxk/roc_cpt.c b/drivers/common/cnxk/roc_cpt.c
index 9f283ceb2e..aba2a49d19 100644
--- a/drivers/common/cnxk/roc_cpt.c
+++ b/drivers/common/cnxk/roc_cpt.c
@@ -1135,8 +1135,8 @@ roc_cpt_iq_enable(struct roc_cpt_lf *lf)
 }
 
 int
-roc_cpt_lmtline_init(struct roc_cpt *roc_cpt, struct roc_cpt_lmtline *lmtline,
-		     int lf_id)
+roc_cpt_lmtline_init(struct roc_cpt *roc_cpt, struct roc_cpt_lmtline *lmtline, int lf_id,
+		     bool is_dual)
 {
 	struct roc_cpt_lf *lf;
 
@@ -1145,12 +1145,19 @@ roc_cpt_lmtline_init(struct roc_cpt *roc_cpt, struct roc_cpt_lmtline *lmtline,
 		return -ENOTSUP;
 
 	lmtline->io_addr = lf->io_addr;
-	if (roc_model_is_cn10k())
-		lmtline->io_addr |= ROC_CN10K_CPT_INST_DW_M1 << 4;
+	lmtline->fc_thresh = lf->nb_desc - CPT_LF_FC_MIN_THRESHOLD;
+
+	if (roc_model_is_cn10k()) {
+		if (is_dual) {
+			lmtline->io_addr |= ROC_CN10K_TWO_CPT_INST_DW_M1 << 4;
+			lmtline->fc_thresh = lf->nb_desc -  2 * CPT_LF_FC_MIN_THRESHOLD;
+		} else {
+			lmtline->io_addr |= ROC_CN10K_CPT_INST_DW_M1 << 4;
+		}
+	}
 
 	lmtline->fc_addr = lf->fc_addr;
 	lmtline->lmt_base = lf->lmt_base;
-	lmtline->fc_thresh = lf->nb_desc - CPT_LF_FC_MIN_THRESHOLD;
 
 	return 0;
 }
diff --git a/drivers/common/cnxk/roc_cpt.h b/drivers/common/cnxk/roc_cpt.h
index 8ef9062ae0..e2e919f80f 100644
--- a/drivers/common/cnxk/roc_cpt.h
+++ b/drivers/common/cnxk/roc_cpt.h
@@ -200,12 +200,12 @@ int __roc_api roc_cpt_afs_print(struct roc_cpt *roc_cpt);
 int __roc_api roc_cpt_lfs_print(struct roc_cpt *roc_cpt);
 void __roc_api roc_cpt_iq_disable(struct roc_cpt_lf *lf);
 void __roc_api roc_cpt_iq_enable(struct roc_cpt_lf *lf);
-int __roc_api roc_cpt_lmtline_init(struct roc_cpt *roc_cpt,
-				   struct roc_cpt_lmtline *lmtline, int lf_id);
+int __roc_api roc_cpt_lmtline_init(struct roc_cpt *roc_cpt, struct roc_cpt_lmtline *lmtline,
+				   int lf_id, bool is_dual);
 
 void __roc_api roc_cpt_parse_hdr_dump(FILE *file, const struct cpt_parse_hdr_s *cpth);
-int __roc_api roc_cpt_ctx_write(struct roc_cpt_lf *lf, void *sa_dptr,
-				void *sa_cptr, uint16_t sa_len);
+int __roc_api roc_cpt_ctx_write(struct roc_cpt_lf *lf, void *sa_dptr, void *sa_cptr,
+				uint16_t sa_len);
 
 void __roc_api roc_cpt_int_misc_cb_register(roc_cpt_int_misc_cb_t cb, void *args);
 int __roc_api roc_cpt_int_misc_cb_unregister(roc_cpt_int_misc_cb_t cb, void *args);
diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
index ed964d4d01..780785d656 100644
--- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
@@ -12,11 +12,6 @@
 #include <ethdev_driver.h>
 
 #include "roc_cpt.h"
-#if defined(__aarch64__)
-#include "roc_io.h"
-#else
-#include "roc_io_generic.h"
-#endif
 #include "roc_idev.h"
 #include "roc_sso.h"
 #include "roc_sso_dp.h"
@@ -40,8 +35,8 @@
 
 /* Holds information required to send crypto operations in one burst */
 struct ops_burst {
-	struct rte_crypto_op *op[CN10K_PKTS_PER_LOOP];
-	uint64_t w2[CN10K_PKTS_PER_LOOP];
+	struct rte_crypto_op *op[CN10K_CPT_PKTS_PER_LOOP];
+	uint64_t w2[CN10K_CPT_PKTS_PER_LOOP];
 	struct cn10k_sso_hws *ws;
 	struct cnxk_cpt_qp *qp;
 	uint16_t nb_ops;
@@ -55,56 +50,6 @@ struct vec_request {
 	uint64_t w2;
 };
 
-#if defined(RTE_ARCH_ARM64)
-static __rte_always_inline void __rte_hot
-cn10k_cpt_lmtst_dual_submit(uint64_t *io_addr, const uint16_t lmt_id, int *i)
-{
-	uint64_t lmt_arg;
-
-	/* Check if the total number of instructions is odd or even. */
-	const int flag_odd = *i & 0x1;
-
-	/* Reduce i by 1 when odd number of instructions.*/
-	*i -= flag_odd;
-
-	if (*i > 2 * CN10K_PKTS_PER_STEORL) {
-		lmt_arg = ROC_CN10K_DUAL_CPT_LMT_ARG | (CN10K_PKTS_PER_STEORL - 1) << 12 |
-			  (uint64_t)lmt_id;
-		roc_lmt_submit_steorl(lmt_arg, *io_addr);
-		lmt_arg = ROC_CN10K_DUAL_CPT_LMT_ARG | (*i / 2 - CN10K_PKTS_PER_STEORL - 1) << 12 |
-			  (uint64_t)(lmt_id + CN10K_PKTS_PER_STEORL);
-		roc_lmt_submit_steorl(lmt_arg, *io_addr);
-		if (flag_odd) {
-			*io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) |
-				   (ROC_CN10K_CPT_INST_DW_M1 << 4);
-			lmt_arg = (uint64_t)(lmt_id + *i / 2);
-			roc_lmt_submit_steorl(lmt_arg, *io_addr);
-			*io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) |
-				   (ROC_CN10K_TWO_CPT_INST_DW_M1 << 4);
-			*i += 1;
-		}
-	} else {
-		if (*i != 0) {
-			lmt_arg =
-				ROC_CN10K_DUAL_CPT_LMT_ARG | (*i / 2 - 1) << 12 | (uint64_t)lmt_id;
-			roc_lmt_submit_steorl(lmt_arg, *io_addr);
-		}
-
-		if (flag_odd) {
-			*io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) |
-				   (ROC_CN10K_CPT_INST_DW_M1 << 4);
-			lmt_arg = (uint64_t)(lmt_id + *i / 2);
-			roc_lmt_submit_steorl(lmt_arg, *io_addr);
-			*io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) |
-				   (ROC_CN10K_TWO_CPT_INST_DW_M1 << 4);
-			*i += 1;
-		}
-	}
-
-	rte_io_wmb();
-}
-#endif
-
 static inline struct cnxk_se_sess *
 cn10k_cpt_sym_temp_sess_create(struct cnxk_cpt_qp *qp, struct rte_crypto_op *op)
 {
@@ -387,8 +332,8 @@ static uint16_t
 cn10k_cpt_enqueue_burst(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops,
 			const bool is_sg_ver2)
 {
-	uint64_t lmt_base, lmt_arg, io_addr;
 	struct cpt_inflight_req *infl_req;
+	uint64_t head, lmt_base, io_addr;
 	uint16_t nb_allowed, count = 0;
 	struct cnxk_cpt_qp *qp = qptr;
 	struct pending_queue *pend_q;
@@ -396,7 +341,6 @@ cn10k_cpt_enqueue_burst(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops,
 	union cpt_fc_write_s fc;
 	uint64_t *fc_addr;
 	uint16_t lmt_id;
-	uint64_t head;
 	int ret, i;
 
 	pend_q = &qp->pend_q;
@@ -426,11 +370,11 @@ cn10k_cpt_enqueue_burst(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops,
 		goto pend_q_commit;
 	}
 
-	for (i = 0; i < RTE_MIN(CN10K_PKTS_PER_LOOP, nb_ops); i++) {
+	for (i = 0; i < RTE_MIN(CN10K_CPT_PKTS_PER_LOOP, nb_ops); i++) {
 		infl_req = &pend_q->req_queue[head];
 		infl_req->op_flags = 0;
 
-		ret = cn10k_cpt_fill_inst(qp, ops + i, &inst[2 * i], infl_req, is_sg_ver2);
+		ret = cn10k_cpt_fill_inst(qp, ops + i, &inst[i], infl_req, is_sg_ver2);
 		if (unlikely(ret != 1)) {
 			plt_dp_err("Could not process op: %p", ops + i);
 			if (i == 0)
@@ -441,24 +385,12 @@ cn10k_cpt_enqueue_burst(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops,
 		pending_queue_advance(&head, pq_mask);
 	}
 
-	if (i > CN10K_PKTS_PER_STEORL) {
-		lmt_arg = ROC_CN10K_CPT_LMT_ARG | (CN10K_PKTS_PER_STEORL - 1) << 12 |
-			  (uint64_t)lmt_id;
-		roc_lmt_submit_steorl(lmt_arg, io_addr);
-		lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - CN10K_PKTS_PER_STEORL - 1) << 12 |
-			  (uint64_t)(lmt_id + CN10K_PKTS_PER_STEORL);
-		roc_lmt_submit_steorl(lmt_arg, io_addr);
-	} else {
-		lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - 1) << 12 | (uint64_t)lmt_id;
-		roc_lmt_submit_steorl(lmt_arg, io_addr);
-	}
-
-	rte_io_wmb();
+	cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i);
 
-	if (nb_ops - i > 0 && i == CN10K_PKTS_PER_LOOP) {
-		nb_ops -= i;
-		ops += i;
-		count += i;
+	if (nb_ops - i > 0 && i == CN10K_CPT_PKTS_PER_LOOP) {
+		nb_ops -= CN10K_CPT_PKTS_PER_LOOP;
+		ops += CN10K_CPT_PKTS_PER_LOOP;
+		count += CN10K_CPT_PKTS_PER_LOOP;
 		goto again;
 	}
 
@@ -633,7 +565,7 @@ cn10k_cpt_vec_pkt_submission_timeout_handle(void)
 static inline void
 cn10k_cpt_vec_submit(struct vec_request vec_tbl[], uint16_t vec_tbl_len, struct cnxk_cpt_qp *qp)
 {
-	uint64_t lmt_base, lmt_arg, lmt_id, io_addr;
+	uint64_t lmt_base, lmt_id, io_addr;
 	union cpt_fc_write_s fc;
 	struct cpt_inst_s *inst;
 	uint16_t burst_size;
@@ -661,7 +593,7 @@ cn10k_cpt_vec_submit(struct vec_request vec_tbl[], uint16_t vec_tbl_len, struct
 again:
 	burst_size = RTE_MIN(CN10K_PKTS_PER_STEORL, vec_tbl_len);
 	for (i = 0; i < burst_size; i++)
-		cn10k_cpt_vec_inst_fill(&vec_tbl[i], &inst[i * 2], qp, vec_tbl[0].w7);
+		cn10k_cpt_vec_inst_fill(&vec_tbl[i], &inst[i], qp, vec_tbl[0].w7);
 
 	do {
 		fc.u64[0] = __atomic_load_n(fc_addr, __ATOMIC_RELAXED);
@@ -671,10 +603,7 @@ cn10k_cpt_vec_submit(struct vec_request vec_tbl[], uint16_t vec_tbl_len, struct
 			cn10k_cpt_vec_pkt_submission_timeout_handle();
 	} while (true);
 
-	lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - 1) << 12 | lmt_id;
-	roc_lmt_submit_steorl(lmt_arg, io_addr);
-
-	rte_io_wmb();
+	cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i);
 
 	vec_tbl_len -= i;
 
@@ -688,12 +617,12 @@ static inline int
 ca_lmtst_vec_submit(struct ops_burst *burst, struct vec_request vec_tbl[], uint16_t *vec_tbl_len,
 		    const bool is_sg_ver2)
 {
-	struct cpt_inflight_req *infl_reqs[CN10K_PKTS_PER_LOOP];
-	uint64_t lmt_base, lmt_arg, io_addr;
+	struct cpt_inflight_req *infl_reqs[CN10K_CPT_PKTS_PER_LOOP];
 	uint16_t lmt_id, len = *vec_tbl_len;
 	struct cpt_inst_s *inst, *inst_base;
 	struct cpt_inflight_req *infl_req;
 	struct rte_event_vector *vec;
+	uint64_t lmt_base, io_addr;
 	union cpt_fc_write_s fc;
 	struct cnxk_cpt_qp *qp;
 	uint64_t *fc_addr;
@@ -730,7 +659,7 @@ ca_lmtst_vec_submit(struct ops_burst *burst, struct vec_request vec_tbl[], uint1
 	}
 
 	for (i = 0; i < burst->nb_ops; i++) {
-		inst = &inst_base[2 * i];
+		inst = &inst_base[i];
 		infl_req = infl_reqs[i];
 		infl_req->op_flags = 0;
 
@@ -790,24 +719,12 @@ next_op:;
 	if (CNXK_TT_FROM_TAG(burst->ws->gw_rdata) == SSO_TT_ORDERED)
 		roc_sso_hws_head_wait(burst->ws->base);
 
-	if (i > CN10K_PKTS_PER_STEORL) {
-		lmt_arg = ROC_CN10K_CPT_LMT_ARG | (CN10K_PKTS_PER_STEORL - 1) << 12 |
-			  (uint64_t)lmt_id;
-		roc_lmt_submit_steorl(lmt_arg, io_addr);
-		lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - CN10K_PKTS_PER_STEORL - 1) << 12 |
-			  (uint64_t)(lmt_id + CN10K_PKTS_PER_STEORL);
-		roc_lmt_submit_steorl(lmt_arg, io_addr);
-	} else {
-		lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - 1) << 12 | (uint64_t)lmt_id;
-		roc_lmt_submit_steorl(lmt_arg, io_addr);
-	}
+	cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i);
 
 	/* Store w7 of last successfully filled instruction */
 	inst = &inst_base[2 * (i - 1)];
 	vec_tbl[0].w7 = inst->w7;
 
-	rte_io_wmb();
-
 put:
 	if (i != burst->nb_ops)
 		rte_mempool_put_bulk(qp->ca.req_mp, (void *)&infl_reqs[i], burst->nb_ops - i);
@@ -820,10 +737,10 @@ next_op:;
 static inline uint16_t
 ca_lmtst_burst_submit(struct ops_burst *burst, const bool is_sg_ver2)
 {
-	struct cpt_inflight_req *infl_reqs[CN10K_PKTS_PER_LOOP];
-	uint64_t lmt_base, lmt_arg, io_addr;
+	struct cpt_inflight_req *infl_reqs[CN10K_CPT_PKTS_PER_LOOP];
 	struct cpt_inst_s *inst, *inst_base;
 	struct cpt_inflight_req *infl_req;
+	uint64_t lmt_base, io_addr;
 	union cpt_fc_write_s fc;
 	struct cnxk_cpt_qp *qp;
 	uint64_t *fc_addr;
@@ -854,7 +771,7 @@ ca_lmtst_burst_submit(struct ops_burst *burst, const bool is_sg_ver2)
 	}
 
 	for (i = 0; i < burst->nb_ops; i++) {
-		inst = &inst_base[2 * i];
+		inst = &inst_base[i];
 		infl_req = infl_reqs[i];
 		infl_req->op_flags = 0;
 
@@ -891,19 +808,7 @@ ca_lmtst_burst_submit(struct ops_burst *burst, const bool is_sg_ver2)
 	if (CNXK_TT_FROM_TAG(burst->ws->gw_rdata) == SSO_TT_ORDERED)
 		roc_sso_hws_head_wait(burst->ws->base);
 
-	if (i > CN10K_PKTS_PER_STEORL) {
-		lmt_arg = ROC_CN10K_CPT_LMT_ARG | (CN10K_PKTS_PER_STEORL - 1) << 12 |
-			  (uint64_t)lmt_id;
-		roc_lmt_submit_steorl(lmt_arg, io_addr);
-		lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - CN10K_PKTS_PER_STEORL - 1) << 12 |
-			  (uint64_t)(lmt_id + CN10K_PKTS_PER_STEORL);
-		roc_lmt_submit_steorl(lmt_arg, io_addr);
-	} else {
-		lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - 1) << 12 | (uint64_t)lmt_id;
-		roc_lmt_submit_steorl(lmt_arg, io_addr);
-	}
-
-	rte_io_wmb();
+	cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i);
 
 put:
 	if (unlikely(i != burst->nb_ops))
@@ -965,7 +870,7 @@ cn10k_cpt_crypto_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_ev
 		burst.op[burst.nb_ops] = op;
 
 		/* Max nb_ops per burst check */
-		if (++burst.nb_ops == CN10K_PKTS_PER_LOOP) {
+		if (++burst.nb_ops == CN10K_CPT_PKTS_PER_LOOP) {
 			if (is_vector)
 				submitted = ca_lmtst_vec_submit(&burst, vec_tbl, &vec_tbl_len,
 								is_sg_ver2);
@@ -1467,8 +1372,6 @@ cn10k_cryptodev_sec_inb_rx_inject(void *dev, struct rte_mbuf **pkts,
 
 	vf = cdev->data->dev_private;
 
-	const int nb_pkts_per_loop = 2 * CN10K_PKTS_PER_LOOP;
-
 	lmt_base = vf->rx_inj_lmtline.lmt_base;
 	io_addr = vf->rx_inj_lmtline.io_addr;
 	fc_addr = vf->rx_inj_lmtline.fc_addr;
@@ -1488,7 +1391,7 @@ cn10k_cryptodev_sec_inb_rx_inject(void *dev, struct rte_mbuf **pkts,
 	if (unlikely(fc.s.qsize > fc_thresh))
 		goto exit;
 
-	for (; i < RTE_MIN(nb_pkts_per_loop, nb_pkts); i++) {
+	for (; i < RTE_MIN(CN10K_CPT_PKTS_PER_LOOP, nb_pkts); i++) {
 
 		m = pkts[i];
 		sec_sess = (struct cn10k_sec_session *)sess[i];
@@ -1549,11 +1452,11 @@ cn10k_cryptodev_sec_inb_rx_inject(void *dev, struct rte_mbuf **pkts,
 
 	cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i);
 
-	if (nb_pkts - i > 0 && i == nb_pkts_per_loop) {
-		nb_pkts -= nb_pkts_per_loop;
-		pkts += nb_pkts_per_loop;
-		count += nb_pkts_per_loop;
-		sess += nb_pkts_per_loop;
+	if (nb_pkts - i > 0 && i == CN10K_CPT_PKTS_PER_LOOP) {
+		nb_pkts -= CN10K_CPT_PKTS_PER_LOOP;
+		pkts += CN10K_CPT_PKTS_PER_LOOP;
+		count += CN10K_CPT_PKTS_PER_LOOP;
+		sess += CN10K_CPT_PKTS_PER_LOOP;
 		goto again;
 	}
 
@@ -1652,8 +1555,8 @@ cn10k_cpt_raw_enqueue_burst(void *qpair, uint8_t *drv_ctx, struct rte_crypto_sym
 			    const bool is_sgv2)
 {
 	uint16_t lmt_id, nb_allowed, nb_ops = vec->num;
-	uint64_t lmt_base, lmt_arg, io_addr, head;
 	struct cpt_inflight_req *infl_req;
+	uint64_t lmt_base, io_addr, head;
 	struct cnxk_cpt_qp *qp = qpair;
 	struct cnxk_sym_dp_ctx *dp_ctx;
 	struct pending_queue *pend_q;
@@ -1690,7 +1593,7 @@ cn10k_cpt_raw_enqueue_burst(void *qpair, uint8_t *drv_ctx, struct rte_crypto_sym
 		goto pend_q_commit;
 	}
 
-	for (i = 0; i < RTE_MIN(CN10K_PKTS_PER_LOOP, nb_ops); i++) {
+	for (i = 0; i < RTE_MIN(CN10K_CPT_PKTS_PER_LOOP, nb_ops); i++) {
 		struct cnxk_iov iov;
 
 		index = count + i;
@@ -1698,7 +1601,7 @@ cn10k_cpt_raw_enqueue_burst(void *qpair, uint8_t *drv_ctx, struct rte_crypto_sym
 		infl_req->op_flags = 0;
 
 		cnxk_raw_burst_to_iov(vec, &ofs, index, &iov);
-		ret = cn10k_cpt_raw_fill_inst(&iov, qp, dp_ctx, &inst[2 * i], infl_req,
+		ret = cn10k_cpt_raw_fill_inst(&iov, qp, dp_ctx, &inst[i], infl_req,
 					      user_data[index], is_sgv2);
 		if (unlikely(ret != 1)) {
 			plt_dp_err("Could not process vec: %d", index);
@@ -1712,21 +1615,9 @@ cn10k_cpt_raw_enqueue_burst(void *qpair, uint8_t *drv_ctx, struct rte_crypto_sym
 		pending_queue_advance(&head, pq_mask);
 	}
 
-	if (i > CN10K_PKTS_PER_STEORL) {
-		lmt_arg = ROC_CN10K_CPT_LMT_ARG | (CN10K_PKTS_PER_STEORL - 1) << 12 |
-			  (uint64_t)lmt_id;
-		roc_lmt_submit_steorl(lmt_arg, io_addr);
-		lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - CN10K_PKTS_PER_STEORL - 1) << 12 |
-			  (uint64_t)(lmt_id + CN10K_PKTS_PER_STEORL);
-		roc_lmt_submit_steorl(lmt_arg, io_addr);
-	} else {
-		lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - 1) << 12 | (uint64_t)lmt_id;
-		roc_lmt_submit_steorl(lmt_arg, io_addr);
-	}
-
-	rte_io_wmb();
+	cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i);
 
-	if (nb_ops - i > 0 && i == CN10K_PKTS_PER_LOOP) {
+	if (nb_ops - i > 0 && i == CN10K_CPT_PKTS_PER_LOOP) {
 		nb_ops -= i;
 		count += i;
 		goto again;
@@ -1767,8 +1658,8 @@ cn10k_cpt_raw_enqueue(void *qpair, uint8_t *drv_ctx, struct rte_crypto_vec *data
 		      struct rte_crypto_va_iova_ptr *aad_or_auth_iv, void *user_data,
 		      const bool is_sgv2)
 {
-	uint64_t lmt_base, lmt_arg, io_addr, head;
 	struct cpt_inflight_req *infl_req;
+	uint64_t lmt_base, io_addr, head;
 	struct cnxk_cpt_qp *qp = qpair;
 	struct cnxk_sym_dp_ctx *dp_ctx;
 	uint16_t lmt_id, nb_allowed;
@@ -1776,7 +1667,7 @@ cn10k_cpt_raw_enqueue(void *qpair, uint8_t *drv_ctx, struct rte_crypto_vec *data
 	union cpt_fc_write_s fc;
 	struct cnxk_iov iov;
 	uint64_t *fc_addr;
-	int ret;
+	int ret, i = 1;
 
 	struct pending_queue *pend_q = &qp->pend_q;
 	const uint64_t pq_mask = pend_q->pq_mask;
@@ -1813,10 +1704,7 @@ cn10k_cpt_raw_enqueue(void *qpair, uint8_t *drv_ctx, struct rte_crypto_vec *data
 
 	pending_queue_advance(&head, pq_mask);
 
-	lmt_arg = ROC_CN10K_CPT_LMT_ARG | (uint64_t)lmt_id;
-	roc_lmt_submit_steorl(lmt_arg, io_addr);
-
-	rte_io_wmb();
+	cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i);
 
 	pend_q->head = head;
 	pend_q->time_out = rte_get_timer_cycles() + DEFAULT_COMMAND_TIMEOUT * rte_get_timer_hz();
diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.h b/drivers/crypto/cnxk/cn10k_cryptodev_ops.h
index 406c4abc7f..be76c49a65 100644
--- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.h
+++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.h
@@ -5,15 +5,21 @@
 #ifndef _CN10K_CRYPTODEV_OPS_H_
 #define _CN10K_CRYPTODEV_OPS_H_
 
-#include <rte_compat.h>
 #include <cryptodev_pmd.h>
+#include <rte_compat.h>
 #include <rte_cryptodev.h>
 #include <rte_eventdev.h>
 
+#if defined(__aarch64__)
+#include "roc_io.h"
+#else
+#include "roc_io_generic.h"
+#endif
+
 #include "cnxk_cryptodev.h"
 
-#define CN10K_PKTS_PER_LOOP   32
-#define CN10K_PKTS_PER_STEORL 16
+#define CN10K_PKTS_PER_STEORL	  32
+#define CN10K_LMTLINES_PER_STEORL 16
 
 extern struct rte_cryptodev_ops cn10k_cpt_ops;
 
@@ -34,4 +40,52 @@ __rte_internal
 uint16_t __rte_hot cn10k_cpt_sg_ver2_crypto_adapter_enqueue(void *ws, struct rte_event ev[],
 		uint16_t nb_events);
 
+static __rte_always_inline void __rte_hot
+cn10k_cpt_lmtst_dual_submit(uint64_t *io_addr, const uint16_t lmt_id, int *i)
+{
+	uint64_t lmt_arg;
+
+	/* Check if the total number of instructions is odd or even. */
+	const int flag_odd = *i & 0x1;
+
+	/* Reduce i by 1 when odd number of instructions.*/
+	*i -= flag_odd;
+
+	if (*i > CN10K_PKTS_PER_STEORL) {
+		lmt_arg = ROC_CN10K_DUAL_CPT_LMT_ARG | (CN10K_LMTLINES_PER_STEORL - 1) << 12 |
+			  (uint64_t)lmt_id;
+		roc_lmt_submit_steorl(lmt_arg, *io_addr);
+		lmt_arg = ROC_CN10K_DUAL_CPT_LMT_ARG |
+			  (*i / 2 - CN10K_LMTLINES_PER_STEORL - 1) << 12 |
+			  (uint64_t)(lmt_id + CN10K_LMTLINES_PER_STEORL);
+		roc_lmt_submit_steorl(lmt_arg, *io_addr);
+		if (flag_odd) {
+			*io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) |
+				   (ROC_CN10K_CPT_INST_DW_M1 << 4);
+			lmt_arg = (uint64_t)(lmt_id + *i / 2);
+			roc_lmt_submit_steorl(lmt_arg, *io_addr);
+			*io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) |
+				   (ROC_CN10K_TWO_CPT_INST_DW_M1 << 4);
+			*i += 1;
+		}
+	} else {
+		if (*i != 0) {
+			lmt_arg =
+				ROC_CN10K_DUAL_CPT_LMT_ARG | (*i / 2 - 1) << 12 | (uint64_t)lmt_id;
+			roc_lmt_submit_steorl(lmt_arg, *io_addr);
+		}
+
+		if (flag_odd) {
+			*io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) |
+				   (ROC_CN10K_CPT_INST_DW_M1 << 4);
+			lmt_arg = (uint64_t)(lmt_id + *i / 2);
+			roc_lmt_submit_steorl(lmt_arg, *io_addr);
+			*io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) |
+				   (ROC_CN10K_TWO_CPT_INST_DW_M1 << 4);
+			*i += 1;
+		}
+	}
+
+	rte_io_wmb();
+}
 #endif /* _CN10K_CRYPTODEV_OPS_H_ */
diff --git a/drivers/crypto/cnxk/cnxk_cryptodev_ops.c b/drivers/crypto/cnxk/cnxk_cryptodev_ops.c
index 6acaa4413b..cfcfa79fdf 100644
--- a/drivers/crypto/cnxk/cnxk_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cnxk_cryptodev_ops.c
@@ -431,7 +431,6 @@ cnxk_cpt_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id,
 	struct rte_pci_device *pci_dev;
 	struct cnxk_cpt_qp *qp;
 	uint32_t nb_desc;
-	uint64_t io_addr;
 	int ret;
 
 	if (dev->data->queue_pairs[qp_id] != NULL)
@@ -467,7 +466,7 @@ cnxk_cpt_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id,
 
 	roc_cpt->lf[qp_id] = &qp->lf;
 
-	ret = roc_cpt_lmtline_init(roc_cpt, &qp->lmtline, qp_id);
+	ret = roc_cpt_lmtline_init(roc_cpt, &qp->lmtline, qp_id, true);
 	if (ret < 0) {
 		roc_cpt->lf[qp_id] = NULL;
 		plt_err("Could not init lmtline for queue pair %d", qp_id);
@@ -478,7 +477,7 @@ cnxk_cpt_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id,
 	dev->data->queue_pairs[qp_id] = qp;
 
 	if (qp_id == vf->rx_inject_qp) {
-		ret = roc_cpt_lmtline_init(roc_cpt, &vf->rx_inj_lmtline, vf->rx_inject_qp);
+		ret = roc_cpt_lmtline_init(roc_cpt, &vf->rx_inj_lmtline, vf->rx_inject_qp, true);
 		if (ret) {
 			plt_err("Could not init lmtline Rx inject");
 			goto exit;
@@ -486,14 +485,6 @@ cnxk_cpt_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id,
 
 		vf->rx_inj_sso_pf_func = roc_idev_nix_inl_dev_pffunc_get();
 
-		/* Update IO addr to enable dual submission */
-		io_addr = vf->rx_inj_lmtline.io_addr;
-		io_addr = (io_addr & ~(uint64_t)(0x7 << 4)) | ROC_CN10K_TWO_CPT_INST_DW_M1 << 4;
-		vf->rx_inj_lmtline.io_addr = io_addr;
-
-		/* Update FC threshold to reflect dual submission */
-		vf->rx_inj_lmtline.fc_thresh -= 32;
-
 		/* Block the queue for other submissions */
 		qp->pend_q.pq_mask = 0;
 	}
@@ -969,44 +960,28 @@ rte_pmd_cnxk_crypto_qptr_get(uint8_t dev_id, uint16_t qp_id)
 static inline void
 cnxk_crypto_cn10k_submit(void *qptr, void *inst, uint16_t nb_inst)
 {
-	uint64_t lmt_base, lmt_arg, io_addr;
 	struct cnxk_cpt_qp *qp = qptr;
-	uint16_t i, j, lmt_id;
+	uint64_t lmt_base, io_addr;
+	uint16_t lmt_id;
 	void *lmt_dst;
+	int i;
 
 	lmt_base = qp->lmtline.lmt_base;
 	io_addr = qp->lmtline.io_addr;
 
 	ROC_LMT_BASE_ID_GET(lmt_base, lmt_id);
 
-again:
-	i = RTE_MIN(nb_inst, CN10K_PKTS_PER_LOOP);
 	lmt_dst = PLT_PTR_CAST(lmt_base);
+again:
+	i = RTE_MIN(nb_inst, CN10K_CPT_PKTS_PER_LOOP);
 
-	for (j = 0; j < i; j++) {
-		rte_memcpy(lmt_dst, inst, sizeof(struct cpt_inst_s));
-		inst = RTE_PTR_ADD(inst, sizeof(struct cpt_inst_s));
-		lmt_dst = RTE_PTR_ADD(lmt_dst, 2 * sizeof(struct cpt_inst_s));
-	}
-
-	rte_io_wmb();
-
-	if (i > CN10K_PKTS_PER_STEORL) {
-		lmt_arg = ROC_CN10K_CPT_LMT_ARG | (CN10K_PKTS_PER_STEORL - 1) << 12 |
-			  (uint64_t)lmt_id;
-		roc_lmt_submit_steorl(lmt_arg, io_addr);
-		lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - CN10K_PKTS_PER_STEORL - 1) << 12 |
-			  (uint64_t)(lmt_id + CN10K_PKTS_PER_STEORL);
-		roc_lmt_submit_steorl(lmt_arg, io_addr);
-	} else {
-		lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - 1) << 12 | (uint64_t)lmt_id;
-		roc_lmt_submit_steorl(lmt_arg, io_addr);
-	}
+	memcpy(lmt_dst, inst, i * sizeof(struct cpt_inst_s));
 
-	rte_io_wmb();
+	cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i);
 
 	if (nb_inst - i > 0) {
-		nb_inst -= i;
+		nb_inst -= CN10K_CPT_PKTS_PER_LOOP;
+		inst = RTE_PTR_ADD(inst, CN10K_CPT_PKTS_PER_LOOP * sizeof(struct cpt_inst_s));
 		goto again;
 	}
 }
diff --git a/drivers/crypto/cnxk/cnxk_cryptodev_ops.h b/drivers/crypto/cnxk/cnxk_cryptodev_ops.h
index 9de7e432e4..caf6ac35e5 100644
--- a/drivers/crypto/cnxk/cnxk_cryptodev_ops.h
+++ b/drivers/crypto/cnxk/cnxk_cryptodev_ops.h
@@ -25,6 +25,8 @@
 
 #define MOD_INC(i, l) ((i) == (l - 1) ? (i) = 0 : (i)++)
 
+#define CN10K_CPT_PKTS_PER_LOOP	  64
+
 /* Macros to form words in CPT instruction */
 #define CNXK_CPT_INST_W2(tag, tt, grp, rvu_pf_func)                            \
 	((tag) | ((uint64_t)(tt) << 32) | ((uint64_t)(grp) << 34) |            \
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 98db11ad61..2c049e7041 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -632,7 +632,7 @@ crypto_adapter_qp_setup(const struct rte_cryptodev *cdev, struct cnxk_cpt_qp *qp
 	 * simultaneous enqueue from all available cores.
 	 */
 	if (roc_model_is_cn10k())
-		nb_desc_min = rte_lcore_count() * 32;
+		nb_desc_min = rte_lcore_count() * CN10K_CPT_PKTS_PER_LOOP;
 	else
 		nb_desc_min = rte_lcore_count() * 2;
 
@@ -707,7 +707,7 @@ crypto_adapter_qp_free(struct cnxk_cpt_qp *qp)
 	rte_mempool_free(qp->ca.req_mp);
 	qp->ca.enabled = false;
 
-	ret = roc_cpt_lmtline_init(qp->lf.roc_cpt, &qp->lmtline, qp->lf.lf_id);
+	ret = roc_cpt_lmtline_init(qp->lf.roc_cpt, &qp->lmtline, qp->lf.lf_id, true);
 	if (ret < 0) {
 		plt_err("Could not reset lmtline for queue pair %d", qp->lf.lf_id);
 		return ret;
-- 
2.25.1


  parent reply	other threads:[~2024-06-26 11:01 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-20 14:58 [PATCH 00/12] fixes and improvements to CNXK crypto PMD Aakash Sasidharan
2024-06-20 14:58 ` [PATCH 01/12] common/cnxk: add comments to denote skipped entries Aakash Sasidharan
2024-06-20 14:58 ` [PATCH 02/12] crypto/cnxk: update version map file with PMD APIs Aakash Sasidharan
2024-06-20 14:58 ` [PATCH 03/12] common/cnxk: make inline dev PF func get as idev API Aakash Sasidharan
2024-06-20 14:58 ` [PATCH 04/12] crypto/cnxk: add flow control in Rx inject path Aakash Sasidharan
2024-06-20 14:58 ` [PATCH 05/12] crypto/cnxk: use SSO PF func of inline device in inst Aakash Sasidharan
2024-06-20 14:58 ` [PATCH 06/12] crypto/cnxk: use NEON for Rx inject inst preparation Aakash Sasidharan
2024-06-20 14:58 ` [PATCH 07/12] crypto/cnxk: remove init of CPT result field in packet Aakash Sasidharan
2024-06-20 14:58 ` [PATCH 08/12] crypto/cnxk: add dual submission in Rx inject Aakash Sasidharan
2024-06-20 14:58 ` [PATCH 09/12] crypto/cnxk: update sess pointer for next iteration Aakash Sasidharan
2024-06-20 14:58 ` [PATCH 10/12] crypto/cnxk: fix aes-gcm zero len input cases Aakash Sasidharan
2024-06-20 14:58 ` [PATCH 11/12] crypto/cnxk: make pack IV variable as const Aakash Sasidharan
2024-06-20 14:58 ` [PATCH 12/12] crypto/cnxk: enable dual submission to CPT Aakash Sasidharan
2024-06-24  6:23 ` [PATCH v2 00/12] fixes and improvements to CNXK crypto PMD Aakash Sasidharan
2024-06-24  6:23   ` [PATCH v2 01/12] common/cnxk: add comments to denote skipped entries Aakash Sasidharan
2024-06-24  6:23   ` [PATCH v2 02/12] crypto/cnxk: update version map file with PMD APIs Aakash Sasidharan
2024-06-24  6:23   ` [PATCH v2 03/12] common/cnxk: make inline dev PF func get as idev API Aakash Sasidharan
2024-06-24  6:23   ` [PATCH v2 04/12] crypto/cnxk: add flow control in Rx inject path Aakash Sasidharan
2024-06-24  6:23   ` [PATCH v2 05/12] crypto/cnxk: use SSO PF func of inline device in inst Aakash Sasidharan
2024-06-24  6:23   ` [PATCH v2 06/12] crypto/cnxk: use NEON for Rx inject inst preparation Aakash Sasidharan
2024-06-24  6:23   ` [PATCH v2 07/12] crypto/cnxk: remove init of CPT result field in packet Aakash Sasidharan
2024-06-24  6:23   ` [PATCH v2 08/12] crypto/cnxk: add dual submission in Rx inject Aakash Sasidharan
2024-06-26  6:41     ` Akhil Goyal
2024-06-24  6:23   ` [PATCH v2 09/12] crypto/cnxk: update sess pointer for next iteration Aakash Sasidharan
2024-06-24  6:23   ` [PATCH v2 10/12] crypto/cnxk: fix aes-gcm zero len input cases Aakash Sasidharan
2024-06-24  6:24   ` [PATCH v2 11/12] crypto/cnxk: make pack IV variable as const Aakash Sasidharan
2024-06-24  6:24   ` [PATCH v2 12/12] crypto/cnxk: enable dual submission to CPT Aakash Sasidharan
2024-06-26 10:55   ` [PATCH v3 00/12] Fixes and improvements to CNXK crypto PMD Aakash Sasidharan
2024-06-26 10:55     ` [PATCH v3 01/12] common/cnxk: add comments to denote skipped entries Aakash Sasidharan
2024-06-26 10:55     ` [PATCH v3 02/12] crypto/cnxk: update version map file with PMD APIs Aakash Sasidharan
2024-06-26 10:55     ` [PATCH v3 03/12] common/cnxk: make inline dev PF func get as idev API Aakash Sasidharan
2024-06-26 10:55     ` [PATCH v3 04/12] crypto/cnxk: add flow control in Rx inject path Aakash Sasidharan
2024-06-26 10:55     ` [PATCH v3 05/12] crypto/cnxk: use SSO PF func of inline device in inst Aakash Sasidharan
2024-06-26 10:55     ` [PATCH v3 06/12] crypto/cnxk: use NEON for Rx inject inst preparation Aakash Sasidharan
2024-06-26 10:55     ` [PATCH v3 07/12] crypto/cnxk: remove init of CPT result field in packet Aakash Sasidharan
2024-06-26 10:55     ` [PATCH v3 08/12] crypto/cnxk: add dual submission in Rx inject Aakash Sasidharan
2024-06-26 10:55     ` [PATCH v3 09/12] crypto/cnxk: update sess pointer for next iteration Aakash Sasidharan
2024-06-26 10:55     ` [PATCH v3 10/12] crypto/cnxk: fix aes-gcm zero len input cases Aakash Sasidharan
2024-06-26 10:55     ` [PATCH v3 11/12] crypto/cnxk: make pack IV variable as const Aakash Sasidharan
2024-06-26 10:55     ` Aakash Sasidharan [this message]
2024-06-27  5:11     ` [PATCH v3 00/12] Fixes and improvements to CNXK crypto PMD Akhil Goyal

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240626105534.1386528-13-asasidharan@marvell.com \
    --to=asasidharan@marvell.com \
    --cc=adwivedi@marvell.com \
    --cc=anoobj@marvell.com \
    --cc=dev@dpdk.org \
    --cc=gakhil@marvell.com \
    --cc=hkalra@marvell.com \
    --cc=jerinj@marvell.com \
    --cc=kirankumark@marvell.com \
    --cc=ktejasree@marvell.com \
    --cc=ndabilpuram@marvell.com \
    --cc=pbhagavatula@marvell.com \
    --cc=skori@marvell.com \
    --cc=skoteshwar@marvell.com \
    --cc=sthotton@marvell.com \
    --cc=vvelumuri@marvell.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).