DPDK patches and discussions
 help / color / mirror / Atom feed
* The problem with priv_data in crypto and security API's
@ 2023-11-19 19:58 Stephen Hemminger
  2023-11-20  4:19 ` [EXT] " Anoob Joseph
  0 siblings, 1 reply; 2+ messages in thread
From: Stephen Hemminger @ 2023-11-19 19:58 UTC (permalink / raw)
  To: Akhil Goyal, Fan Zhang; +Cc: dev

In the patchset, that removes the GNU style zero length arrays,
there is a problem caused by the use of these in the cryptodev
header files.

For both rte_session and rte_cryptodev, the current convention
is to do:

struct rte_security_session {
	RTE_MARKER cacheline0;
	uint64_t opaque_data;
	/**< Opaque user defined data */
	uint64_t fast_mdata;
	/**< Fast metadata to be used for inline path */
	rte_iova_t driver_priv_data_iova;
	/**< session private data IOVA address */

	RTE_MARKER cacheline1 __rte_cache_min_aligned;
	uint8_t driver_priv_data[0];
	/**< Private session material, variable size (depends on driver) */
};

This can not just be replaced with a C90 flex array because then
clang correctly complains of using flex array not at the end
of the structure.

struct cn9k_sec_session {
	struct rte_security_session rte_sess;

	/** PMD private space */

	/** ESN */
	union {
		uint64_t esn;
		struct {
			uint32_t seq_lo;
			uint32_t seq_hi;
		};
	};
	/** IPsec SA direction */
	uint8_t is_outbound;
	/* ESN enable flag */
	uint8_t esn_en;
	/** Pre-populated CPT inst words */
	struct cnxk_cpt_inst_tmpl inst;
	/** Response length calculation data */
	struct cnxk_ipsec_outb_rlens rlens;
	/** Anti replay window size */
	uint32_t replay_win_sz;
	/** Cipher IV offset in bytes */
	uint16_t cipher_iv_off;
	/** Cipher IV length in bytes */
	uint8_t cipher_iv_len;
	/** Outbound custom header length */
	uint8_t custom_hdr_len;
	/** Anti replay */
	struct cnxk_on_ipsec_ar ar;
	/** Queue pair */
	struct cnxk_cpt_qp *qp;

	struct cn9k_ipsec_sa sa;
} __rte_cache_aligned;



There are a couple of ways to fix this, and both are non-trivial.
The first is to get rid of the priv_data element and modify the
management to do the cache alignment.

Something like:

diff --git a/lib/security/rte_security_driver.h b/lib/security/rte_security_driver.h
index faa4074f1965..ffc5b5d7257b 100644
--- a/lib/security/rte_security_driver.h
+++ b/lib/security/rte_security_driver.h
@@ -30,11 +30,6 @@ struct rte_security_session {
        uint64_t fast_mdata;
        /**< Fast metadata to be used for inline path */
        rte_iova_t driver_priv_data_iova;
-       /**< session private data IOVA address */
-
-       RTE_MARKER cacheline1 __rte_cache_min_aligned;
-       uint8_t driver_priv_data[];
-       /**< Private session material, variable size (depends on driver) */
 };
 
 /**
@@ -61,13 +56,33 @@ struct rte_security_ctx {
        /**< Number of MACsec SA attached to this context */
 };
 
+/**
+ * Helper to acces rte_session private data
+ * @param      sess
+ *    Pointer to Security private session structure
+ *
+ * @return
+ *    Pointer to area reserved for private data
+ */
+static inline void *
+rte_security_priv(const struct rte_security_session *sess)
+{
+       return (void *)((uintptr_t)sess + RTE_CACHE_LINE_ROUNDUP(sizeof(*sess)));
+}
+

diff --git a/lib/security/rte_security.c b/lib/security/rte_security.c
index b082a290296b..f3202e3df6cd 100644
--- a/lib/security/rte_security.c
+++ b/lib/security/rte_security.c
@@ -66,6 +66,7 @@ rte_security_session_create(void *ctx,
 {
        struct rte_security_session *sess = NULL;
        struct rte_security_ctx *instance = ctx;
+       const size_t sess_size = RTE_CACHE_LINE_ROUNDUP(sizeof(*sess));
        uint32_t sess_priv_size;
 
        RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, session_create, NULL, NULL);
@@ -73,17 +74,16 @@ rte_security_session_create(void *ctx,
        RTE_PTR_OR_ERR_RET(mp, NULL);
 
        sess_priv_size = instance->ops->session_get_size(instance->device);
-       if (mp->elt_size < (sizeof(struct rte_security_session) + sess_priv_size))
+       if (mp->elt_size < sess_size + sess_priv_size)
                return NULL;
 
        if (rte_mempool_get(mp, (void **)&sess))
                return NULL;
 
        /* Clear session priv data */
-       memset(sess->driver_priv_data, 0, sess_priv_size);
+       memset(rte_security_priv(sess), 0, sess_priv_size);
 
-       sess->driver_priv_data_iova = rte_mempool_virt2iova(sess) +
-                       offsetof(struct rte_security_session, driver_priv_data);
+       sess->driver_priv_data_iova = rte_mempool_virt2iova(sess) + sess_size;
        if (instance->ops->session_create(instance->device, conf, sess)) {
                rte_mempool_put(mp, (v

This is what Linux kernel does with netdev_priv() helper.

There is also a lot of open coded versions of same thing
in Intel crypto drivers. Anything with that many casts looks
like a design mistake to me.

      struct ixgbe_crypto_session *ic_session = (void *)(uintptr_t)
                       ((const struct rte_security_session *)sess)->driver_priv_data;

^ permalink raw reply	[flat|nested] 2+ messages in thread

* RE: [EXT] The problem with priv_data in crypto and security API's
  2023-11-19 19:58 The problem with priv_data in crypto and security API's Stephen Hemminger
@ 2023-11-20  4:19 ` Anoob Joseph
  0 siblings, 0 replies; 2+ messages in thread
From: Anoob Joseph @ 2023-11-20  4:19 UTC (permalink / raw)
  To: Stephen Hemminger, Akhil Goyal, Fan Zhang; +Cc: dev, Tejasree Kondoj

Hi Stephen,

Since the issue is only with cnxk drivers, may I propose a change in driver to mitigate this issue?

Flex array that is proposed is already used by asymmetric sessions. We have addressed the issue you highlighted with below scheme.

#define ASYM_SESS_SIZE sizeof(struct rte_cryptodev_asym_session)

struct cnxk_ae_sess {
	uint8_t rte_sess[ASYM_SESS_SIZE];
	enum rte_crypto_asym_xform_type xfrm_type;
	union {
		struct rte_crypto_rsa_xform rsa_ctx;
		struct rte_crypto_modex_xform mod_ctx;
		struct roc_ae_ec_ctx ec_ctx;
	};
	uint64_t *cnxk_fpm_iova;
	struct roc_ae_ec_group **ec_grp;
	uint64_t cpt_inst_w7;
	uint64_t cpt_inst_w2;
	struct cnxk_cpt_qp *qp;
	struct roc_cpt_lf *lf;
	struct hw_ctx_s {
		union {
			struct {
				uint64_t rsvd : 48;

				uint64_t ctx_push_size : 7;
				uint64_t rsvd1 : 1;

				uint64_t ctx_hdr_size : 2;
				uint64_t aop_valid : 1;
				uint64_t rsvd2 : 1;
				uint64_t ctx_size : 4;
			} s;
			uint64_t u64;
		} w0;
		uint8_t rsvd[256];
	} hw_ctx __plt_aligned(ROC_ALIGN);
};

The hw_ctx that is used in above struct needs alignment that is different from cache line alignment. The above struct layout allow us to minimize the size of the struct and cacheline accesses. Basically, the space after rte_sess portion would be the driver internal space and we align hw_ctx without having to do any pointer manipulations. Also, having both DPDK specific fields and driver internal fields in first cacheline minimizes the cache accesses in the datapath.

I can push a patch if you think this approach is okay.

Thanks,
Anoob

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Monday, November 20, 2023 1:28 AM
> To: Akhil Goyal <gakhil@marvell.com>; Fan Zhang
> <fanzhang.oss@gmail.com>
> Cc: dev@dpdk.org
> Subject: [EXT] The problem with priv_data in crypto and security API's
> 
> External Email
> 
> ----------------------------------------------------------------------
> In the patchset, that removes the GNU style zero length arrays, there is a
> problem caused by the use of these in the cryptodev header files.
> 
> For both rte_session and rte_cryptodev, the current convention is to do:
> 
> struct rte_security_session {
> 	RTE_MARKER cacheline0;
> 	uint64_t opaque_data;
> 	/**< Opaque user defined data */
> 	uint64_t fast_mdata;
> 	/**< Fast metadata to be used for inline path */
> 	rte_iova_t driver_priv_data_iova;
> 	/**< session private data IOVA address */
> 
> 	RTE_MARKER cacheline1 __rte_cache_min_aligned;
> 	uint8_t driver_priv_data[0];
> 	/**< Private session material, variable size (depends on driver) */ };
> 
> This can not just be replaced with a C90 flex array because then clang
> correctly complains of using flex array not at the end of the structure.
> 
> struct cn9k_sec_session {
> 	struct rte_security_session rte_sess;
> 
> 	/** PMD private space */
> 
> 	/** ESN */
> 	union {
> 		uint64_t esn;
> 		struct {
> 			uint32_t seq_lo;
> 			uint32_t seq_hi;
> 		};
> 	};
> 	/** IPsec SA direction */
> 	uint8_t is_outbound;
> 	/* ESN enable flag */
> 	uint8_t esn_en;
> 	/** Pre-populated CPT inst words */
> 	struct cnxk_cpt_inst_tmpl inst;
> 	/** Response length calculation data */
> 	struct cnxk_ipsec_outb_rlens rlens;
> 	/** Anti replay window size */
> 	uint32_t replay_win_sz;
> 	/** Cipher IV offset in bytes */
> 	uint16_t cipher_iv_off;
> 	/** Cipher IV length in bytes */
> 	uint8_t cipher_iv_len;
> 	/** Outbound custom header length */
> 	uint8_t custom_hdr_len;
> 	/** Anti replay */
> 	struct cnxk_on_ipsec_ar ar;
> 	/** Queue pair */
> 	struct cnxk_cpt_qp *qp;
> 
> 	struct cn9k_ipsec_sa sa;
> } __rte_cache_aligned;
> 
> 
> 
> There are a couple of ways to fix this, and both are non-trivial.
> The first is to get rid of the priv_data element and modify the management
> to do the cache alignment.
> 
> Something like:
> 
> diff --git a/lib/security/rte_security_driver.h
> b/lib/security/rte_security_driver.h
> index faa4074f1965..ffc5b5d7257b 100644
> --- a/lib/security/rte_security_driver.h
> +++ b/lib/security/rte_security_driver.h
> @@ -30,11 +30,6 @@ struct rte_security_session {
>         uint64_t fast_mdata;
>         /**< Fast metadata to be used for inline path */
>         rte_iova_t driver_priv_data_iova;
> -       /**< session private data IOVA address */
> -
> -       RTE_MARKER cacheline1 __rte_cache_min_aligned;
> -       uint8_t driver_priv_data[];
> -       /**< Private session material, variable size (depends on driver) */
>  };
> 
>  /**
> @@ -61,13 +56,33 @@ struct rte_security_ctx {
>         /**< Number of MACsec SA attached to this context */  };
> 
> +/**
> + * Helper to acces rte_session private data
> + * @param      sess
> + *    Pointer to Security private session structure
> + *
> + * @return
> + *    Pointer to area reserved for private data
> + */
> +static inline void *
> +rte_security_priv(const struct rte_security_session *sess) {
> +       return (void *)((uintptr_t)sess +
> +RTE_CACHE_LINE_ROUNDUP(sizeof(*sess)));
> +}
> +
> 
> diff --git a/lib/security/rte_security.c b/lib/security/rte_security.c index
> b082a290296b..f3202e3df6cd 100644
> --- a/lib/security/rte_security.c
> +++ b/lib/security/rte_security.c
> @@ -66,6 +66,7 @@ rte_security_session_create(void *ctx,  {
>         struct rte_security_session *sess = NULL;
>         struct rte_security_ctx *instance = ctx;
> +       const size_t sess_size = RTE_CACHE_LINE_ROUNDUP(sizeof(*sess));
>         uint32_t sess_priv_size;
> 
>         RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, session_create, NULL,
> NULL); @@ -73,17 +74,16 @@ rte_security_session_create(void *ctx,
>         RTE_PTR_OR_ERR_RET(mp, NULL);
> 
>         sess_priv_size = instance->ops->session_get_size(instance->device);
> -       if (mp->elt_size < (sizeof(struct rte_security_session) + sess_priv_size))
> +       if (mp->elt_size < sess_size + sess_priv_size)
>                 return NULL;
> 
>         if (rte_mempool_get(mp, (void **)&sess))
>                 return NULL;
> 
>         /* Clear session priv data */
> -       memset(sess->driver_priv_data, 0, sess_priv_size);
> +       memset(rte_security_priv(sess), 0, sess_priv_size);
> 
> -       sess->driver_priv_data_iova = rte_mempool_virt2iova(sess) +
> -                       offsetof(struct rte_security_session, driver_priv_data);
> +       sess->driver_priv_data_iova = rte_mempool_virt2iova(sess) +
> + sess_size;
>         if (instance->ops->session_create(instance->device, conf, sess)) {
>                 rte_mempool_put(mp, (v
> 
> This is what Linux kernel does with netdev_priv() helper.
> 
> There is also a lot of open coded versions of same thing in Intel crypto
> drivers. Anything with that many casts looks like a design mistake to me.
> 
>       struct ixgbe_crypto_session *ic_session = (void *)(uintptr_t)
>                        ((const struct rte_security_session *)sess)->driver_priv_data;

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-11-20  4:19 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-19 19:58 The problem with priv_data in crypto and security API's Stephen Hemminger
2023-11-20  4:19 ` [EXT] " Anoob Joseph

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).