Currently EVP auth ctxs (e.g. EVP_MD_CTX, EVP_MAC_CTX) are allocated,
copied to (from openssl_session), and then freed for every auth
operation (ie. per packet). This is very inefficient, and avoidable.
Make each openssl_session hold an array of structures, containing
pointers to per-queue-pair cipher and auth context copies. These are
populated on first use by allocating a new context and copying from the
main context. These copies can then be used in a thread-safe manner by
different worker lcores simultaneously. Consequently the auth context
allocation and copy only has to happen once - the first time a given qp
uses an openssl_session. This brings about a large performance boost.
Throughput performance uplift measurements for HMAC-SHA1 generate on
Ampere Altra Max platform:
1 worker lcore
| buffer sz (B) | prev (Gbps) | optimised (Gbps) | uplift |
|-----------------+---------------+--------------------+----------|
| 64 | 0.63 | 1.42 | 123.5% |
| 256 | 2.24 | 4.40 | 96.4% |
| 1024 | 6.15 | 9.26 | 50.6% |
| 2048 | 8.68 | 11.38 | 31.1% |
| 4096 | 10.92 | 12.84 | 17.6% |
8 worker lcores
| buffer sz (B) | prev (Gbps) | optimised (Gbps) | uplift |
|-----------------+---------------+--------------------+----------|
| 64 | 0.93 | 11.35 | 1122.5% |
| 256 | 3.70 | 35.30 | 853.7% |
| 1024 | 15.22 | 74.27 | 387.8% |
| 2048 | 30.20 | 91.08 | 201.6% |
| 4096 | 56.92 | 102.76 | 80.5% |
Signed-off-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>
Reviewed-by: Wathsala Vithanage <wathsala.vithanage@arm.com>
---
drivers/crypto/openssl/compat.h | 26 +++
drivers/crypto/openssl/openssl_pmd_private.h | 25 ++-
drivers/crypto/openssl/rte_openssl_pmd.c | 176 +++++++++++++++----
drivers/crypto/openssl/rte_openssl_pmd_ops.c | 7 +-
4 files changed, 193 insertions(+), 41 deletions(-)
diff --git a/drivers/crypto/openssl/compat.h b/drivers/crypto/openssl/compat.h
index 9f9167c4f1..4c5ddfbf3a 100644
--- a/drivers/crypto/openssl/compat.h
+++ b/drivers/crypto/openssl/compat.h
@@ -5,6 +5,32 @@
#ifndef __RTA_COMPAT_H__
#define __RTA_COMPAT_H__
+#if OPENSSL_VERSION_NUMBER > 0x30000000L
+static __rte_always_inline void
+free_hmac_ctx(EVP_MAC_CTX *ctx)
+{
+ EVP_MAC_CTX_free(ctx);
+}
+
+static __rte_always_inline void
+free_cmac_ctx(EVP_MAC_CTX *ctx)
+{
+ EVP_MAC_CTX_free(ctx);
+}
+#else
+static __rte_always_inline void
+free_hmac_ctx(HMAC_CTX *ctx)
+{
+ HMAC_CTX_free(ctx);
+}
+
+static __rte_always_inline void
+free_cmac_ctx(CMAC_CTX *ctx)
+{
+ CMAC_CTX_free(ctx);
+}
+#endif
+
#if (OPENSSL_VERSION_NUMBER < 0x10100000L)
static __rte_always_inline int
diff --git a/drivers/crypto/openssl/openssl_pmd_private.h b/drivers/crypto/openssl/openssl_pmd_private.h
index bad7dcf2f5..a50e4d4918 100644
--- a/drivers/crypto/openssl/openssl_pmd_private.h
+++ b/drivers/crypto/openssl/openssl_pmd_private.h
@@ -80,6 +80,20 @@ struct __rte_cache_aligned openssl_qp {
*/
};
+struct evp_ctx_pair {
+ EVP_CIPHER_CTX *cipher;
+ union {
+ EVP_MD_CTX *auth;
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+ EVP_MAC_CTX *hmac;
+ EVP_MAC_CTX *cmac;
+#else
+ HMAC_CTX *hmac;
+ CMAC_CTX *cmac;
+#endif
+ };
+};
+
/** OPENSSL crypto private session structure */
struct __rte_cache_aligned openssl_session {
enum openssl_chain_order chain_order;
@@ -168,11 +182,12 @@ struct __rte_cache_aligned openssl_session {
uint16_t ctx_copies_len;
/* < number of entries in ctx_copies */
- EVP_CIPHER_CTX *qp_ctx[];
- /**< Flexible array member of per-queue-pair pointers to copies of EVP
- * context structure. Cipher contexts are not safe to use from multiple
- * cores simultaneously, so maintaining these copies allows avoiding
- * per-buffer copying into a temporary context.
+ struct evp_ctx_pair qp_ctx[];
+ /**< Flexible array member of per-queue-pair structures, each containing
+ * pointers to copies of the cipher and auth EVP contexts. Cipher
+ * contexts are not safe to use from multiple cores simultaneously, so
+ * maintaining these copies allows avoiding per-buffer copying into a
+ * temporary context.
*/
};
diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c
index df44cc097e..7e2e505222 100644
--- a/drivers/crypto/openssl/rte_openssl_pmd.c
+++ b/drivers/crypto/openssl/rte_openssl_pmd.c
@@ -892,40 +892,45 @@ openssl_set_session_parameters(struct openssl_session *sess,
void
openssl_reset_session(struct openssl_session *sess)
{
+ /* Free all the qp_ctx entries. */
for (uint16_t i = 0; i < sess->ctx_copies_len; i++) {
- if (sess->qp_ctx[i] != NULL) {
- EVP_CIPHER_CTX_free(sess->qp_ctx[i]);
- sess->qp_ctx[i] = NULL;
+ if (sess->qp_ctx[i].cipher != NULL) {
+ EVP_CIPHER_CTX_free(sess->qp_ctx[i].cipher);
+ sess->qp_ctx[i].cipher = NULL;
+ }
+
+ switch (sess->auth.mode) {
+ case OPENSSL_AUTH_AS_AUTH:
+ EVP_MD_CTX_destroy(sess->qp_ctx[i].auth);
+ sess->qp_ctx[i].auth = NULL;
+ break;
+ case OPENSSL_AUTH_AS_HMAC:
+ free_hmac_ctx(sess->qp_ctx[i].hmac);
+ sess->qp_ctx[i].hmac = NULL;
+ break;
+ case OPENSSL_AUTH_AS_CMAC:
+ free_cmac_ctx(sess->qp_ctx[i].cmac);
+ sess->qp_ctx[i].cmac = NULL;
+ break;
}
}
EVP_CIPHER_CTX_free(sess->cipher.ctx);
- if (sess->chain_order == OPENSSL_CHAIN_CIPHER_BPI)
- EVP_CIPHER_CTX_free(sess->cipher.bpi_ctx);
-
switch (sess->auth.mode) {
case OPENSSL_AUTH_AS_AUTH:
EVP_MD_CTX_destroy(sess->auth.auth.ctx);
break;
case OPENSSL_AUTH_AS_HMAC:
- EVP_PKEY_free(sess->auth.hmac.pkey);
-# if OPENSSL_VERSION_NUMBER >= 0x30000000L
- EVP_MAC_CTX_free(sess->auth.hmac.ctx);
-# else
- HMAC_CTX_free(sess->auth.hmac.ctx);
-# endif
+ free_hmac_ctx(sess->auth.hmac.ctx);
break;
case OPENSSL_AUTH_AS_CMAC:
-# if OPENSSL_VERSION_NUMBER >= 0x30000000L
- EVP_MAC_CTX_free(sess->auth.cmac.ctx);
-# else
- CMAC_CTX_free(sess->auth.cmac.ctx);
-# endif
- break;
- default:
+ free_cmac_ctx(sess->auth.cmac.ctx);
break;
}
+
+ if (sess->chain_order == OPENSSL_CHAIN_CIPHER_BPI)
+ EVP_CIPHER_CTX_free(sess->cipher.bpi_ctx);
}
/** Provide session for operation */
@@ -1471,6 +1476,9 @@ process_openssl_auth_mac(struct rte_mbuf *mbuf_src, uint8_t *dst, int offset,
if (m == 0)
goto process_auth_err;
+ if (EVP_MAC_init(ctx, NULL, 0, NULL) <= 0)
+ goto process_auth_err;
+
src = rte_pktmbuf_mtod_offset(m, uint8_t *, offset);
l = rte_pktmbuf_data_len(m) - offset;
@@ -1497,11 +1505,9 @@ process_openssl_auth_mac(struct rte_mbuf *mbuf_src, uint8_t *dst, int offset,
if (EVP_MAC_final(ctx, dst, &dstlen, DIGEST_LENGTH_MAX) != 1)
goto process_auth_err;
- EVP_MAC_CTX_free(ctx);
return 0;
process_auth_err:
- EVP_MAC_CTX_free(ctx);
OPENSSL_LOG(ERR, "Process openssl auth failed");
return -EINVAL;
}
@@ -1620,7 +1626,7 @@ get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)
if (sess->ctx_copies_len == 0)
return sess->cipher.ctx;
- EVP_CIPHER_CTX **lctx = &sess->qp_ctx[qp->id];
+ EVP_CIPHER_CTX **lctx = &sess->qp_ctx[qp->id].cipher;
if (unlikely(*lctx == NULL)) {
#if OPENSSL_VERSION_NUMBER >= 0x30200000L
@@ -1647,6 +1653,112 @@ get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)
return *lctx;
}
+static inline EVP_MD_CTX *
+get_local_auth_ctx(struct openssl_session *sess, struct openssl_qp *qp)
+{
+ /* If the array is not being used, just return the main context. */
+ if (sess->ctx_copies_len == 0)
+ return sess->auth.auth.ctx;
+
+ EVP_MD_CTX **lctx = &sess->qp_ctx[qp->id].auth;
+
+ if (unlikely(*lctx == NULL)) {
+#if OPENSSL_VERSION_NUMBER >= 0x30100000L
+ /* EVP_MD_CTX_dup() added in OSSL 3.1 */
+ *lctx = EVP_MD_CTX_dup(sess->auth.auth.ctx);
+#else
+ *lctx = EVP_MD_CTX_new();
+ EVP_MD_CTX_copy(*lctx, sess->auth.auth.ctx);
+#endif
+ }
+
+ return *lctx;
+}
+
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+static inline EVP_MAC_CTX *
+#else
+static inline HMAC_CTX *
+#endif
+get_local_hmac_ctx(struct openssl_session *sess, struct openssl_qp *qp)
+{
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)
+ /* For OpenSSL versions 3.0.0 <= v < 3.0.3, re-initing of
+ * EVP_MAC_CTXs is broken, and doesn't actually reset their
+ * state. This was fixed in OSSL commit c9ddc5af5199 ("Avoid
+ * undefined behavior of provided macs on EVP_MAC
+ * reinitialization"). In cases where the fix is not present,
+ * fall back to duplicating the context every buffer as a
+ * workaround, at the cost of performance.
+ */
+ RTE_SET_USED(qp);
+ return EVP_MAC_CTX_dup(sess->auth.hmac.ctx);
+#else
+ if (sess->ctx_copies_len == 0)
+ return sess->auth.hmac.ctx;
+
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+ EVP_MAC_CTX **lctx =
+#else
+ HMAC_CTX **lctx =
+#endif
+ &sess->qp_ctx[qp->id].hmac;
+
+ if (unlikely(*lctx == NULL)) {
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+ *lctx = EVP_MAC_CTX_dup(sess->auth.hmac.ctx);
+#else
+ *lctx = HMAC_CTX_new();
+ HMAC_CTX_copy(*lctx, sess->auth.hmac.ctx);
+#endif
+ }
+
+ return *lctx;
+#endif
+}
+
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+static inline EVP_MAC_CTX *
+#else
+static inline CMAC_CTX *
+#endif
+get_local_cmac_ctx(struct openssl_session *sess, struct openssl_qp *qp)
+{
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)
+ /* For OpenSSL versions 3.0.0 <= v < 3.0.3, re-initing of
+ * EVP_MAC_CTXs is broken, and doesn't actually reset their
+ * state. This was fixed in OSSL commit c9ddc5af5199 ("Avoid
+ * undefined behavior of provided macs on EVP_MAC
+ * reinitialization"). In cases where the fix is not present,
+ * fall back to duplicating the context every buffer as a
+ * workaround, at the cost of performance.
+ */
+ RTE_SET_USED(qp);
+ return EVP_MAC_CTX_dup(sess->auth.cmac.ctx);
+#else
+ if (sess->ctx_copies_len == 0)
+ return sess->auth.cmac.ctx;
+
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+ EVP_MAC_CTX **lctx =
+#else
+ CMAC_CTX **lctx =
+#endif
+ &sess->qp_ctx[qp->id].cmac;
+
+ if (unlikely(*lctx == NULL)) {
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+ *lctx = EVP_MAC_CTX_dup(sess->auth.cmac.ctx);
+#else
+ *lctx = CMAC_CTX_new();
+ CMAC_CTX_copy(*lctx, sess->auth.cmac.ctx);
+#endif
+ }
+
+ return *lctx;
+#endif
+}
+
/** Process auth/cipher combined operation */
static void
process_openssl_combined_op(struct openssl_qp *qp, struct rte_crypto_op *op,
@@ -1895,42 +2007,40 @@ process_openssl_auth_op(struct openssl_qp *qp, struct rte_crypto_op *op,
switch (sess->auth.mode) {
case OPENSSL_AUTH_AS_AUTH:
- ctx_a = EVP_MD_CTX_create();
- EVP_MD_CTX_copy_ex(ctx_a, sess->auth.auth.ctx);
+ ctx_a = get_local_auth_ctx(sess, qp);
status = process_openssl_auth(mbuf_src, dst,
op->sym->auth.data.offset, NULL, NULL, srclen,
ctx_a, sess->auth.auth.evp_algo);
- EVP_MD_CTX_destroy(ctx_a);
break;
case OPENSSL_AUTH_AS_HMAC:
+ ctx_h = get_local_hmac_ctx(sess, qp);
# if OPENSSL_VERSION_NUMBER >= 0x30000000L
- ctx_h = EVP_MAC_CTX_dup(sess->auth.hmac.ctx);
status = process_openssl_auth_mac(mbuf_src, dst,
op->sym->auth.data.offset, srclen,
ctx_h);
# else
- ctx_h = HMAC_CTX_new();
- HMAC_CTX_copy(ctx_h, sess->auth.hmac.ctx);
status = process_openssl_auth_hmac(mbuf_src, dst,
op->sym->auth.data.offset, srclen,
ctx_h);
- HMAC_CTX_free(ctx_h);
# endif
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)
+ EVP_MAC_CTX_free(ctx_h);
+#endif
break;
case OPENSSL_AUTH_AS_CMAC:
+ ctx_c = get_local_cmac_ctx(sess, qp);
# if OPENSSL_VERSION_NUMBER >= 0x30000000L
- ctx_c = EVP_MAC_CTX_dup(sess->auth.cmac.ctx);
status = process_openssl_auth_mac(mbuf_src, dst,
op->sym->auth.data.offset, srclen,
ctx_c);
# else
- ctx_c = CMAC_CTX_new();
- CMAC_CTX_copy(ctx_c, sess->auth.cmac.ctx);
status = process_openssl_auth_cmac(mbuf_src, dst,
op->sym->auth.data.offset, srclen,
ctx_c);
- CMAC_CTX_free(ctx_c);
# endif
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)
+ EVP_MAC_CTX_free(ctx_c);
+#endif
break;
default:
status = -1;
diff --git a/drivers/crypto/openssl/rte_openssl_pmd_ops.c b/drivers/crypto/openssl/rte_openssl_pmd_ops.c
index 4209c6ab6f..1bbb855a59 100644
--- a/drivers/crypto/openssl/rte_openssl_pmd_ops.c
+++ b/drivers/crypto/openssl/rte_openssl_pmd_ops.c
@@ -805,7 +805,7 @@ openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)
unsigned int max_nb_qps = ((struct openssl_private *)
dev->data->dev_private)->max_nb_qpairs;
return sizeof(struct openssl_session) +
- (sizeof(void *) * max_nb_qps);
+ (sizeof(struct evp_ctx_pair) * max_nb_qps);
}
/*
@@ -818,10 +818,11 @@ openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)
/*
* Otherwise, the size of the flexible array member should be enough to
- * fit pointers to per-qp contexts.
+ * fit pointers to per-qp contexts. This is twice the number of queue
+ * pairs, to allow for auth and cipher contexts.
*/
return sizeof(struct openssl_session) +
- (sizeof(void *) * dev->data->nb_queue_pairs);
+ (sizeof(struct evp_ctx_pair) * dev->data->nb_queue_pairs);
}
/** Returns the size of the asymmetric session structure */
--
2.34.1