patches for DPDK stable branches
 help / color / mirror / Atom feed
* [dpdk-stable] [PATCH v1 6/6] crypto/ccp: scheduling multiple CCP within single burst
@ 2019-10-15  7:02 asomalap
  2019-10-15 11:12 ` Akhil Goyal
  0 siblings, 1 reply; 3+ messages in thread
From: asomalap @ 2019-10-15  7:02 UTC (permalink / raw)
  To: dev; +Cc: stable

From: Amaranath Somalapuram <asomalap@amd.com>

ccp driver was scheduling only one CCP in a single burst.
Effective throughput was limited to 1 CCP performance.
Scheduling multiple ccp within one burst will increase the ccp performance.
this changes will divide the enqueue packets equally among the multiple CCP
Cc: stable@dpdk.org

Signed-off-by: Amaranath Somalapuram <asomalap@amd.com>
---
 drivers/crypto/ccp/ccp_crypto.c      | 22 +++++++----
 drivers/crypto/ccp/ccp_crypto.h      |  7 +++-
 drivers/crypto/ccp/ccp_pmd_private.h |  2 +
 drivers/crypto/ccp/rte_ccp_pmd.c     | 57 +++++++++++++++++++---------
 4 files changed, 62 insertions(+), 26 deletions(-)

diff --git a/drivers/crypto/ccp/ccp_crypto.c b/drivers/crypto/ccp/ccp_crypto.c
index 8862a1a84..23694bac6 100644
--- a/drivers/crypto/ccp/ccp_crypto.c
+++ b/drivers/crypto/ccp/ccp_crypto.c
@@ -2803,7 +2803,9 @@ process_ops_to_enqueue(struct ccp_qp *qp,
 		       struct rte_crypto_op **op,
 		       struct ccp_queue *cmd_q,
 		       uint16_t nb_ops,
-		       int slots_req)
+		       uint16_t total_nb_ops,
+		       int slots_req,
+		       uint16_t b_idx)
 {
 	int i, result = 0;
 	struct ccp_batch_info *b_info;
@@ -2824,6 +2826,7 @@ process_ops_to_enqueue(struct ccp_qp *qp,
 
 	/* populate batch info necessary for dequeue */
 	b_info->op_idx = 0;
+	b_info->b_idx = 0;
 	b_info->lsb_buf_idx = 0;
 	b_info->desccnt = 0;
 	b_info->cmd_q = cmd_q;
@@ -2839,7 +2842,7 @@ process_ops_to_enqueue(struct ccp_qp *qp,
 
 	b_info->head_offset = (uint32_t)(cmd_q->qbase_phys_addr + cmd_q->qidx *
 					 Q_DESC_SIZE);
-	for (i = 0; i < nb_ops; i++) {
+	for (i = b_idx; i < (nb_ops+b_idx); i++) {
 		session = (struct ccp_session *)get_sym_session_private_data(
 						 op[i]->sym->session,
 						 ccp_cryptodev_driver_id);
@@ -2891,6 +2894,8 @@ process_ops_to_enqueue(struct ccp_qp *qp,
 	}
 
 	b_info->opcnt = i;
+	b_info->b_idx = b_idx;
+	b_info->total_nb_ops = total_nb_ops;
 	b_info->tail_offset = (uint32_t)(cmd_q->qbase_phys_addr + cmd_q->qidx *
 					 Q_DESC_SIZE);
 
@@ -2905,7 +2910,7 @@ process_ops_to_enqueue(struct ccp_qp *qp,
 	rte_ring_enqueue(qp->processed_pkts, (void *)b_info);
 
 	EVP_MD_CTX_destroy(auth_ctx);
-	return i;
+	return i-b_idx;
 }
 
 static inline void ccp_auth_dq_prepare(struct rte_crypto_op *op)
@@ -2990,8 +2995,8 @@ ccp_prepare_ops(struct ccp_qp *qp,
 	}
 	min_ops = RTE_MIN(nb_ops, b_info->opcnt);
 
-	for (i = 0; i < min_ops; i++) {
-		op_d[i] = b_info->op[b_info->op_idx++];
+	for (i =  b_info->b_idx; i < min_ops; i++) {
+		op_d[i] = b_info->op[b_info->b_idx + b_info->op_idx++];
 		session = (struct ccp_session *)get_sym_session_private_data(
 						 op_d[i]->sym->session,
 						ccp_cryptodev_driver_id);
@@ -3032,7 +3037,8 @@ ccp_prepare_ops(struct ccp_qp *qp,
 int
 process_ops_to_dequeue(struct ccp_qp *qp,
 		       struct rte_crypto_op **op,
-		       uint16_t nb_ops)
+		       uint16_t nb_ops,
+		       uint16_t *total_nb_ops)
 {
 	struct ccp_batch_info *b_info;
 	uint32_t cur_head_offset;
@@ -3047,6 +3053,7 @@ process_ops_to_dequeue(struct ccp_qp *qp,
 
 	if (b_info->auth_ctr == b_info->opcnt)
 		goto success;
+	*total_nb_ops = b_info->total_nb_ops;
 	cur_head_offset = CCP_READ_REG(b_info->cmd_q->reg_base,
 				       CMD_Q_HEAD_LO_BASE);
 
@@ -3056,7 +3063,7 @@ process_ops_to_dequeue(struct ccp_qp *qp,
 			qp->b_info = b_info;
 			return 0;
 		}
-	} else {
+	} else if (b_info->tail_offset != b_info->head_offset) {
 		if ((cur_head_offset >= b_info->head_offset) ||
 		    (cur_head_offset < b_info->tail_offset)) {
 			qp->b_info = b_info;
@@ -3066,6 +3073,7 @@ process_ops_to_dequeue(struct ccp_qp *qp,
 
 
 success:
+	*total_nb_ops = b_info->total_nb_ops;
 	nb_ops = ccp_prepare_ops(qp, op, b_info, nb_ops);
 	rte_atomic64_add(&b_info->cmd_q->free_slots, b_info->desccnt);
 	b_info->desccnt = 0;
diff --git a/drivers/crypto/ccp/ccp_crypto.h b/drivers/crypto/ccp/ccp_crypto.h
index 882b398ac..8e6d03efc 100644
--- a/drivers/crypto/ccp/ccp_crypto.h
+++ b/drivers/crypto/ccp/ccp_crypto.h
@@ -353,7 +353,9 @@ int process_ops_to_enqueue(struct ccp_qp *qp,
 			   struct rte_crypto_op **op,
 			   struct ccp_queue *cmd_q,
 			   uint16_t nb_ops,
-			   int slots_req);
+			   uint16_t total_nb_ops,
+			   int slots_req,
+			   uint16_t b_idx);
 
 /**
  * process crypto ops to be dequeued
@@ -365,7 +367,8 @@ int process_ops_to_enqueue(struct ccp_qp *qp,
  */
 int process_ops_to_dequeue(struct ccp_qp *qp,
 			   struct rte_crypto_op **op,
-			   uint16_t nb_ops);
+			   uint16_t nb_ops,
+			   uint16_t *total_nb_ops);
 
 
 /**
diff --git a/drivers/crypto/ccp/ccp_pmd_private.h b/drivers/crypto/ccp/ccp_pmd_private.h
index 781050c31..1c4118ee3 100644
--- a/drivers/crypto/ccp/ccp_pmd_private.h
+++ b/drivers/crypto/ccp/ccp_pmd_private.h
@@ -50,8 +50,10 @@ struct ccp_batch_info {
 	struct rte_crypto_op *op[CCP_MAX_BURST];
 	/**< optable populated at enque time from app*/
 	int op_idx;
+	uint16_t b_idx;
 	struct ccp_queue *cmd_q;
 	uint16_t opcnt;
+	uint16_t total_nb_ops;
 	/**< no. of crypto ops in batch*/
 	int desccnt;
 	/**< no. of ccp queue descriptors*/
diff --git a/drivers/crypto/ccp/rte_ccp_pmd.c b/drivers/crypto/ccp/rte_ccp_pmd.c
index a182c6a52..4807b580e 100644
--- a/drivers/crypto/ccp/rte_ccp_pmd.c
+++ b/drivers/crypto/ccp/rte_ccp_pmd.c
@@ -23,6 +23,7 @@
 static unsigned int ccp_pmd_init_done;
 uint8_t ccp_cryptodev_driver_id;
 extern void *sha_ctx;
+uint8_t cryptodev_cnt;
 
 struct ccp_pmd_init_params {
 	struct rte_cryptodev_pmd_init_params def_p;
@@ -202,30 +203,45 @@ ccp_pmd_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops,
 	struct ccp_queue *cmd_q;
 	struct rte_cryptodev *dev = qp->dev;
 	uint16_t i, enq_cnt = 0, slots_req = 0;
+	uint16_t tmp_ops = nb_ops, b_idx, cur_ops = 0;
 
 	if (nb_ops == 0)
 		return 0;
 
 	if (unlikely(rte_ring_full(qp->processed_pkts) != 0))
 		return 0;
+	if (tmp_ops >= cryptodev_cnt)
+		cur_ops = nb_ops / cryptodev_cnt + (nb_ops)%cryptodev_cnt;
+	else
+		cur_ops = tmp_ops;
+	while (tmp_ops)	{
+		b_idx = nb_ops - tmp_ops;
+		slots_req = 0;
+		if (cur_ops <= tmp_ops) {
+			tmp_ops -= cur_ops;
+		} else {
+			cur_ops = tmp_ops;
+			tmp_ops = 0;
+		}
+		for (i = 0; i < cur_ops; i++) {
+			sess = get_ccp_session(qp, ops[i + b_idx]);
+			if (unlikely(sess == NULL) && (i == 0)) {
+				qp->qp_stats.enqueue_err_count++;
+				return 0;
+			} else if (sess == NULL) {
+				cur_ops = i;
+				break;
+			}
+			slots_req += ccp_compute_slot_count(sess);
+		}
 
-	for (i = 0; i < nb_ops; i++) {
-		sess = get_ccp_session(qp, ops[i]);
-		if (unlikely(sess == NULL) && (i == 0)) {
-			qp->qp_stats.enqueue_err_count++;
+		cmd_q = ccp_allot_queue(dev, slots_req);
+		if (unlikely(cmd_q == NULL))
 			return 0;
-		} else if (sess == NULL) {
-			nb_ops = i;
-			break;
-		}
-		slots_req += ccp_compute_slot_count(sess);
+		enq_cnt += process_ops_to_enqueue(qp, ops, cmd_q, cur_ops,
+				nb_ops, slots_req, b_idx);
+		i++;
 	}
-
-	cmd_q = ccp_allot_queue(dev, slots_req);
-	if (unlikely(cmd_q == NULL))
-		return 0;
-
-	enq_cnt = process_ops_to_enqueue(qp, ops, cmd_q, nb_ops, slots_req);
 	qp->qp_stats.enqueued_count += enq_cnt;
 	return enq_cnt;
 }
@@ -235,9 +251,16 @@ ccp_pmd_dequeue_burst(void *queue_pair, struct rte_crypto_op **ops,
 		uint16_t nb_ops)
 {
 	struct ccp_qp *qp = queue_pair;
-	uint16_t nb_dequeued = 0, i;
+	uint16_t nb_dequeued = 0, i, total_nb_ops;
+
+	nb_dequeued = process_ops_to_dequeue(qp, ops, nb_ops, &total_nb_ops);
 
-	nb_dequeued = process_ops_to_dequeue(qp, ops, nb_ops);
+	if (total_nb_ops) {
+		while (nb_dequeued != total_nb_ops) {
+			nb_dequeued = process_ops_to_dequeue(qp,
+					ops, nb_ops, &total_nb_ops);
+		}
+	}
 
 	/* Free session if a session-less crypto op */
 	for (i = 0; i < nb_dequeued; i++)
-- 
2.17.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [dpdk-stable] [PATCH v1 6/6] crypto/ccp: scheduling multiple CCP within single burst
  2019-10-15  7:02 [dpdk-stable] [PATCH v1 6/6] crypto/ccp: scheduling multiple CCP within single burst asomalap
@ 2019-10-15 11:12 ` Akhil Goyal
  2019-10-15 11:15   ` Akhil Goyal
  0 siblings, 1 reply; 3+ messages in thread
From: Akhil Goyal @ 2019-10-15 11:12 UTC (permalink / raw)
  To: asomalap, dev; +Cc: stable

Title: crypto/ccp: schedule multiple devs within single burst
Or 
Crypto/ccp: improve performance.

> 
> From: Amaranath Somalapuram <asomalap@amd.com>
> 
> ccp driver was scheduling only one CCP in a single burst.
> Effective throughput was limited to 1 CCP performance.
> Scheduling multiple ccp within one burst will increase the ccp performance.
> this changes will divide the enqueue packets equally among the multiple CCP
> Cc: stable@dpdk.org

This patch also doesn't look like a fix, rather a performance improvement.
Not sure if this need to be backported?
If yes, then a fixes line is required and title would be "crypto/ccp: fix performance"

> 
> Signed-off-by: Amaranath Somalapuram <asomalap@amd.com>
> ---


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [dpdk-stable] [PATCH v1 6/6] crypto/ccp: scheduling multiple CCP within single burst
  2019-10-15 11:12 ` Akhil Goyal
@ 2019-10-15 11:15   ` Akhil Goyal
  0 siblings, 0 replies; 3+ messages in thread
From: Akhil Goyal @ 2019-10-15 11:15 UTC (permalink / raw)
  To: asomalap, dev, Ravi Kumar; +Cc: stable

Also please include CCP maintainer in cc list for v2.
@Ravi Kumar: could you please review this series.

> -----Original Message-----
> From: Akhil Goyal
> Sent: Tuesday, October 15, 2019 4:43 PM
> To: asomalap@amd.com; dev@dpdk.org
> Cc: stable@dpdk.org
> Subject: RE: [PATCH v1 6/6] crypto/ccp: scheduling multiple CCP within single
> burst
> 
> Title: crypto/ccp: schedule multiple devs within single burst
> Or
> Crypto/ccp: improve performance.
> 
> >
> > From: Amaranath Somalapuram <asomalap@amd.com>
> >
> > ccp driver was scheduling only one CCP in a single burst.
> > Effective throughput was limited to 1 CCP performance.
> > Scheduling multiple ccp within one burst will increase the ccp performance.
> > this changes will divide the enqueue packets equally among the multiple CCP
> > Cc: stable@dpdk.org
> 
> This patch also doesn't look like a fix, rather a performance improvement.
> Not sure if this need to be backported?
> If yes, then a fixes line is required and title would be "crypto/ccp: fix
> performance"
> 
> >
> > Signed-off-by: Amaranath Somalapuram <asomalap@amd.com>
> > ---


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-10-15 11:15 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-15  7:02 [dpdk-stable] [PATCH v1 6/6] crypto/ccp: scheduling multiple CCP within single burst asomalap
2019-10-15 11:12 ` Akhil Goyal
2019-10-15 11:15   ` Akhil Goyal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).