DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] RFC: ethdev: add reassembly offload
@ 2021-08-23 10:02 Akhil Goyal
  2021-08-23 10:18 ` Andrew Rybchenko
                   ` (3 more replies)
  0 siblings, 4 replies; 53+ messages in thread
From: Akhil Goyal @ 2021-08-23 10:02 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, adwivedi, ferruh.yigit,
	andrew.rybchenko, Akhil Goyal

Reassembly is a costly operation if it is done in
software, however, if it is offloaded to HW, it can
considerably save application cycles.
The operation becomes even more costlier if IP fragmants
are encrypted.

To resolve above two issues, a new offload
DEV_RX_OFFLOAD_REASSEMBLY is introduced in ethdev for
devices which can attempt reassembly of packets in hardware.
rte_eth_dev_info is added with the reassembly capabilities
which a device can support.
Now, if IP fragments are encrypted, reassembly can also be
attempted while doing inline IPsec processing.
This is controlled by a flag in rte_security_ipsec_sa_options
to enable reassembly of encrypted IP fragments in the inline
path.

The resulting reassembled packet would be a typical
segmented mbuf in case of success.

And if reassembly of fragments is failed or is incomplete (if
fragments do not come before the reass_timeout), the mbuf is
updated with an ol_flag PKT_RX_REASSEMBLY_INCOMPLETE and
mbuf is returned as is. Now application may decide the fate
of the packet to wait more for fragments to come or drop.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
---
 lib/ethdev/rte_ethdev.c     |  1 +
 lib/ethdev/rte_ethdev.h     | 18 +++++++++++++++++-
 lib/mbuf/rte_mbuf_core.h    |  3 ++-
 lib/security/rte_security.h | 10 ++++++++++
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 9d95cd11e1..1ab3a093cf 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -119,6 +119,7 @@ static const struct {
 	RTE_RX_OFFLOAD_BIT2STR(VLAN_FILTER),
 	RTE_RX_OFFLOAD_BIT2STR(VLAN_EXTEND),
 	RTE_RX_OFFLOAD_BIT2STR(JUMBO_FRAME),
+	RTE_RX_OFFLOAD_BIT2STR(REASSEMBLY),
 	RTE_RX_OFFLOAD_BIT2STR(SCATTER),
 	RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP),
 	RTE_RX_OFFLOAD_BIT2STR(SECURITY),
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index d2b27c351f..e89a4dc1eb 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1360,6 +1360,7 @@ struct rte_eth_conf {
 #define DEV_RX_OFFLOAD_VLAN_FILTER	0x00000200
 #define DEV_RX_OFFLOAD_VLAN_EXTEND	0x00000400
 #define DEV_RX_OFFLOAD_JUMBO_FRAME	0x00000800
+#define DEV_RX_OFFLOAD_REASSEMBLY	0x00001000
 #define DEV_RX_OFFLOAD_SCATTER		0x00002000
 /**
  * Timestamp is set by the driver in RTE_MBUF_DYNFIELD_TIMESTAMP_NAME
@@ -1477,6 +1478,20 @@ struct rte_eth_dev_portconf {
  */
 #define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID	(UINT16_MAX)
 
+/**
+ * Reassembly capabilities that a device can support.
+ * The device which can support reassembly offload should set
+ * DEV_RX_OFFLOAD_REASSEMBLY
+ */
+struct rte_eth_reass_capa {
+	/** Maximum time in ns that a fragment can wait for further fragments */
+	uint64_t reass_timeout;
+	/** Maximum number of fragments that device can reassemble */
+	uint16_t max_frags;
+	/** Reserved for future capabilities */
+	uint16_t reserved[3];
+};
+
 /**
  * Ethernet device associated switch information
  */
@@ -1582,8 +1597,9 @@ struct rte_eth_dev_info {
 	 * embedded managed interconnect/switch.
 	 */
 	struct rte_eth_switch_info switch_info;
+	/* Reassembly capabilities of a device for reassembly offload */
+	struct rte_eth_reass_capa reass_capa;
 
-	uint64_t reserved_64s[2]; /**< Reserved for future fields */
 	void *reserved_ptrs[2];   /**< Reserved for future fields */
 };
 
diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index bb38d7f581..cea25c87f7 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -200,10 +200,11 @@ extern "C" {
 #define PKT_RX_OUTER_L4_CKSUM_BAD	(1ULL << 21)
 #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
 #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
+#define PKT_RX_REASSEMBLY_INCOMPLETE	(1ULL << 23)
 
 /* add new RX flags here, don't forget to update PKT_FIRST_FREE */
 
-#define PKT_FIRST_FREE (1ULL << 23)
+#define PKT_FIRST_FREE (1ULL << 24)
 #define PKT_LAST_FREE (1ULL << 40)
 
 /* add new TX flags here, don't forget to update PKT_LAST_FREE  */
diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
index 88d31de0a6..364eeb5cd4 100644
--- a/lib/security/rte_security.h
+++ b/lib/security/rte_security.h
@@ -181,6 +181,16 @@ struct rte_security_ipsec_sa_options {
 	 * * 0: Disable per session security statistics collection for this SA.
 	 */
 	uint32_t stats : 1;
+
+	/** Enable reassembly on incoming packets.
+	 *
+	 * * 1: Enable driver to try reassembly of encrypted IP packets for
+	 *      this SA, if supported by the driver. This feature will work
+	 *      only if rx_offload DEV_RX_OFFLOAD_REASSEMBLY is set in
+	 *      inline ethernet device.
+	 * * 0: Disable reassembly of packets (default).
+	 */
+	uint32_t reass_en : 1;
 };
 
 /** IPSec security association direction */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [dpdk-dev] [PATCH] RFC: ethdev: add reassembly offload
  2021-08-23 10:02 [dpdk-dev] [PATCH] RFC: ethdev: add reassembly offload Akhil Goyal
@ 2021-08-23 10:18 ` Andrew Rybchenko
  2021-08-29 13:14   ` [dpdk-dev] [EXT] " Akhil Goyal
  2021-09-07  8:47 ` [dpdk-dev] " Ferruh Yigit
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 53+ messages in thread
From: Andrew Rybchenko @ 2021-08-23 10:18 UTC (permalink / raw)
  To: Akhil Goyal, dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, adwivedi, ferruh.yigit

On 8/23/21 1:02 PM, Akhil Goyal wrote:
> Reassembly is a costly operation if it is done in
> software, however, if it is offloaded to HW, it can
> considerably save application cycles.
> The operation becomes even more costlier if IP fragmants
> are encrypted.
> 
> To resolve above two issues, a new offload
> DEV_RX_OFFLOAD_REASSEMBLY is introduced in ethdev for
> devices which can attempt reassembly of packets in hardware.
> rte_eth_dev_info is added with the reassembly capabilities
> which a device can support.
> Now, if IP fragments are encrypted, reassembly can also be
> attempted while doing inline IPsec processing.
> This is controlled by a flag in rte_security_ipsec_sa_options
> to enable reassembly of encrypted IP fragments in the inline
> path.
> 
> The resulting reassembled packet would be a typical
> segmented mbuf in case of success.
> 
> And if reassembly of fragments is failed or is incomplete (if
> fragments do not come before the reass_timeout), the mbuf is
> updated with an ol_flag PKT_RX_REASSEMBLY_INCOMPLETE and
> mbuf is returned as is. Now application may decide the fate
> of the packet to wait more for fragments to come or drop.
> 
> Signed-off-by: Akhil Goyal <gakhil@marvell.com>

Is it IPv4 only or IPv6 as well? I guess IPv4 only to start
with. If so, I think offload name should say so. See below.

I'd say that the feature should be added to
doc/guides/nics/features.rst

Do we really need RX_REASSEMBLY_INCOMPLETE if we provide
buffered packets for incomplete reassembly anyway?
I guess it is sufficient to cover simply reassembly case
only in HW when there is no overlapping fragments etc.
Everything else should be handled in SW anyway as without
the offload support at all.

> ---
>  lib/ethdev/rte_ethdev.c     |  1 +
>  lib/ethdev/rte_ethdev.h     | 18 +++++++++++++++++-
>  lib/mbuf/rte_mbuf_core.h    |  3 ++-
>  lib/security/rte_security.h | 10 ++++++++++
>  4 files changed, 30 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index 9d95cd11e1..1ab3a093cf 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -119,6 +119,7 @@ static const struct {
>  	RTE_RX_OFFLOAD_BIT2STR(VLAN_FILTER),
>  	RTE_RX_OFFLOAD_BIT2STR(VLAN_EXTEND),
>  	RTE_RX_OFFLOAD_BIT2STR(JUMBO_FRAME),
> +	RTE_RX_OFFLOAD_BIT2STR(REASSEMBLY),
>  	RTE_RX_OFFLOAD_BIT2STR(SCATTER),
>  	RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP),
>  	RTE_RX_OFFLOAD_BIT2STR(SECURITY),
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index d2b27c351f..e89a4dc1eb 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -1360,6 +1360,7 @@ struct rte_eth_conf {
>  #define DEV_RX_OFFLOAD_VLAN_FILTER	0x00000200
>  #define DEV_RX_OFFLOAD_VLAN_EXTEND	0x00000400
>  #define DEV_RX_OFFLOAD_JUMBO_FRAME	0x00000800
> +#define DEV_RX_OFFLOAD_REASSEMBLY	0x00001000

I think it should be:
RTE_ETH_RX_OFFLOAD_IPV4_REASSEMBLY

i.e. have correct prefix similar to
RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT and mention IPv4.

If we'd like to cover IPv6 as well, it could be
RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY and have IPv4/6
support bits in the offload capabilities below.

>  #define DEV_RX_OFFLOAD_SCATTER		0x00002000
>  /**
>   * Timestamp is set by the driver in RTE_MBUF_DYNFIELD_TIMESTAMP_NAME
> @@ -1477,6 +1478,20 @@ struct rte_eth_dev_portconf {
>   */
>  #define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID	(UINT16_MAX)
>  
> +/**
> + * Reassembly capabilities that a device can support.
> + * The device which can support reassembly offload should set
> + * DEV_RX_OFFLOAD_REASSEMBLY
> + */
> +struct rte_eth_reass_capa {
> +	/** Maximum time in ns that a fragment can wait for further fragments */
> +	uint64_t reass_timeout;
> +	/** Maximum number of fragments that device can reassemble */
> +	uint16_t max_frags;
> +	/** Reserved for future capabilities */
> +	uint16_t reserved[3];
> +};
> +
>  /**
>   * Ethernet device associated switch information
>   */
> @@ -1582,8 +1597,9 @@ struct rte_eth_dev_info {
>  	 * embedded managed interconnect/switch.
>  	 */
>  	struct rte_eth_switch_info switch_info;
> +	/* Reassembly capabilities of a device for reassembly offload */
> +	struct rte_eth_reass_capa reass_capa;
>  
> -	uint64_t reserved_64s[2]; /**< Reserved for future fields */
>  	void *reserved_ptrs[2];   /**< Reserved for future fields */
>  };
>  
> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> index bb38d7f581..cea25c87f7 100644
> --- a/lib/mbuf/rte_mbuf_core.h
> +++ b/lib/mbuf/rte_mbuf_core.h
> @@ -200,10 +200,11 @@ extern "C" {
>  #define PKT_RX_OUTER_L4_CKSUM_BAD	(1ULL << 21)
>  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
>  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
> +#define PKT_RX_REASSEMBLY_INCOMPLETE	(1ULL << 23)

In accordance with deprecation notice it should be
RTE_MBUF_F_RX_REASSEMBLY_INCOMPLETE

>  
>  /* add new RX flags here, don't forget to update PKT_FIRST_FREE */
>  
> -#define PKT_FIRST_FREE (1ULL << 23)
> +#define PKT_FIRST_FREE (1ULL << 24)
>  #define PKT_LAST_FREE (1ULL << 40)
>  
>  /* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
> index 88d31de0a6..364eeb5cd4 100644
> --- a/lib/security/rte_security.h
> +++ b/lib/security/rte_security.h
> @@ -181,6 +181,16 @@ struct rte_security_ipsec_sa_options {
>  	 * * 0: Disable per session security statistics collection for this SA.
>  	 */
>  	uint32_t stats : 1;
> +
> +	/** Enable reassembly on incoming packets.
> +	 *
> +	 * * 1: Enable driver to try reassembly of encrypted IP packets for
> +	 *      this SA, if supported by the driver. This feature will work
> +	 *      only if rx_offload DEV_RX_OFFLOAD_REASSEMBLY is set in
> +	 *      inline ethernet device.

ethernet -> Ethernet

> +	 * * 0: Disable reassembly of packets (default).
> +	 */
> +	uint32_t reass_en : 1;
>  };
>  
>  /** IPSec security association direction */
> 


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [dpdk-dev] [EXT] Re: [PATCH] RFC: ethdev: add reassembly offload
  2021-08-23 10:18 ` Andrew Rybchenko
@ 2021-08-29 13:14   ` Akhil Goyal
  2021-09-21 19:59     ` Thomas Monjalon
  0 siblings, 1 reply; 53+ messages in thread
From: Akhil Goyal @ 2021-08-29 13:14 UTC (permalink / raw)
  To: Andrew Rybchenko, dev
  Cc: Anoob Joseph, radu.nicolau, declan.doherty, hemant.agrawal,
	matan, konstantin.ananyev, thomas, Ankur Dwivedi, ferruh.yigit

> On 8/23/21 1:02 PM, Akhil Goyal wrote:
> > Reassembly is a costly operation if it is done in
> > software, however, if it is offloaded to HW, it can
> > considerably save application cycles.
> > The operation becomes even more costlier if IP fragmants
> > are encrypted.
> >
> > To resolve above two issues, a new offload
> > DEV_RX_OFFLOAD_REASSEMBLY is introduced in ethdev for
> > devices which can attempt reassembly of packets in hardware.
> > rte_eth_dev_info is added with the reassembly capabilities
> > which a device can support.
> > Now, if IP fragments are encrypted, reassembly can also be
> > attempted while doing inline IPsec processing.
> > This is controlled by a flag in rte_security_ipsec_sa_options
> > to enable reassembly of encrypted IP fragments in the inline
> > path.
> >
> > The resulting reassembled packet would be a typical
> > segmented mbuf in case of success.
> >
> > And if reassembly of fragments is failed or is incomplete (if
> > fragments do not come before the reass_timeout), the mbuf is
> > updated with an ol_flag PKT_RX_REASSEMBLY_INCOMPLETE and
> > mbuf is returned as is. Now application may decide the fate
> > of the packet to wait more for fragments to come or drop.
> >
> > Signed-off-by: Akhil Goyal <gakhil@marvell.com>
> 
> Is it IPv4 only or IPv6 as well? I guess IPv4 only to start
> with. If so, I think offload name should say so. See below.
> 
We can update spec for both and update capabilities for both.
See below.

> I'd say that the feature should be added to
> doc/guides/nics/features.rst

OK will update in next version
> 
> Do we really need RX_REASSEMBLY_INCOMPLETE if we provide
> buffered packets for incomplete reassembly anyway?
> I guess it is sufficient to cover simply reassembly case
> only in HW when there is no overlapping fragments etc.
> Everything else should be handled in SW anyway as without
> the offload support at all.
> 
In that case, application would need to again parse the packet
to check whether it is a fragment or not even when the reassembly
is not required. However, we would consider your suggestion in
implementation.

> > ---
> >  lib/ethdev/rte_ethdev.c     |  1 +
> >  lib/ethdev/rte_ethdev.h     | 18 +++++++++++++++++-
> >  lib/mbuf/rte_mbuf_core.h    |  3 ++-
> >  lib/security/rte_security.h | 10 ++++++++++
> >  4 files changed, 30 insertions(+), 2 deletions(-)
> >
> > diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> > index 9d95cd11e1..1ab3a093cf 100644
> > --- a/lib/ethdev/rte_ethdev.c
> > +++ b/lib/ethdev/rte_ethdev.c
> > @@ -119,6 +119,7 @@ static const struct {
> >  	RTE_RX_OFFLOAD_BIT2STR(VLAN_FILTER),
> >  	RTE_RX_OFFLOAD_BIT2STR(VLAN_EXTEND),
> >  	RTE_RX_OFFLOAD_BIT2STR(JUMBO_FRAME),
> > +	RTE_RX_OFFLOAD_BIT2STR(REASSEMBLY),
> >  	RTE_RX_OFFLOAD_BIT2STR(SCATTER),
> >  	RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP),
> >  	RTE_RX_OFFLOAD_BIT2STR(SECURITY),
> > diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> > index d2b27c351f..e89a4dc1eb 100644
> > --- a/lib/ethdev/rte_ethdev.h
> > +++ b/lib/ethdev/rte_ethdev.h
> > @@ -1360,6 +1360,7 @@ struct rte_eth_conf {
> >  #define DEV_RX_OFFLOAD_VLAN_FILTER	0x00000200
> >  #define DEV_RX_OFFLOAD_VLAN_EXTEND	0x00000400
> >  #define DEV_RX_OFFLOAD_JUMBO_FRAME	0x00000800
> > +#define DEV_RX_OFFLOAD_REASSEMBLY	0x00001000
> 
> I think it should be:
> RTE_ETH_RX_OFFLOAD_IPV4_REASSEMBLY
> 
> i.e. have correct prefix similar to
> RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT and mention IPv4.
> 
> If we'd like to cover IPv6 as well, it could be
> RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY and have IPv4/6
> support bits in the offload capabilities below.

Intention is to update spec for both.
Will update the capabilities accordingly to have both IPv4 and IPv6.

> 
> >  #define DEV_RX_OFFLOAD_SCATTER		0x00002000
> >  /**
> >   * Timestamp is set by the driver in
> RTE_MBUF_DYNFIELD_TIMESTAMP_NAME
> > @@ -1477,6 +1478,20 @@ struct rte_eth_dev_portconf {
> >   */
> >  #define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID
> 	(UINT16_MAX)
> >
> > +/**
> > + * Reassembly capabilities that a device can support.
> > + * The device which can support reassembly offload should set
> > + * DEV_RX_OFFLOAD_REASSEMBLY
> > + */
> > +struct rte_eth_reass_capa {
> > +	/** Maximum time in ns that a fragment can wait for further
> fragments */
> > +	uint64_t reass_timeout;
> > +	/** Maximum number of fragments that device can reassemble */
> > +	uint16_t max_frags;
> > +	/** Reserved for future capabilities */
> > +	uint16_t reserved[3];
> > +};
> > +
> >  /**
> >   * Ethernet device associated switch information
> >   */
> > @@ -1582,8 +1597,9 @@ struct rte_eth_dev_info {
> >  	 * embedded managed interconnect/switch.
> >  	 */
> >  	struct rte_eth_switch_info switch_info;
> > +	/* Reassembly capabilities of a device for reassembly offload */
> > +	struct rte_eth_reass_capa reass_capa;
> >
> > -	uint64_t reserved_64s[2]; /**< Reserved for future fields */
> >  	void *reserved_ptrs[2];   /**< Reserved for future fields */
> >  };
> >
> > diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> > index bb38d7f581..cea25c87f7 100644
> > --- a/lib/mbuf/rte_mbuf_core.h
> > +++ b/lib/mbuf/rte_mbuf_core.h
> > @@ -200,10 +200,11 @@ extern "C" {
> >  #define PKT_RX_OUTER_L4_CKSUM_BAD	(1ULL << 21)
> >  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
> >  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
> > +#define PKT_RX_REASSEMBLY_INCOMPLETE	(1ULL << 23)
> 
> In accordance with deprecation notice it should be
> RTE_MBUF_F_RX_REASSEMBLY_INCOMPLETE
> 
Ok will correct in next version.

> >
> >  /* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> >
> > -#define PKT_FIRST_FREE (1ULL << 23)
> > +#define PKT_FIRST_FREE (1ULL << 24)
> >  #define PKT_LAST_FREE (1ULL << 40)
> >
> >  /* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> > diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
> > index 88d31de0a6..364eeb5cd4 100644
> > --- a/lib/security/rte_security.h
> > +++ b/lib/security/rte_security.h
> > @@ -181,6 +181,16 @@ struct rte_security_ipsec_sa_options {
> >  	 * * 0: Disable per session security statistics collection for this SA.
> >  	 */
> >  	uint32_t stats : 1;
> > +
> > +	/** Enable reassembly on incoming packets.
> > +	 *
> > +	 * * 1: Enable driver to try reassembly of encrypted IP packets for
> > +	 *      this SA, if supported by the driver. This feature will work
> > +	 *      only if rx_offload DEV_RX_OFFLOAD_REASSEMBLY is set in
> > +	 *      inline ethernet device.
> 
> ethernet -> Ethernet
> 
> > +	 * * 0: Disable reassembly of packets (default).
> > +	 */
> > +	uint32_t reass_en : 1;
> >  };
> >
> >  /** IPSec security association direction */
> >


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [dpdk-dev] [PATCH] RFC: ethdev: add reassembly offload
  2021-08-23 10:02 [dpdk-dev] [PATCH] RFC: ethdev: add reassembly offload Akhil Goyal
  2021-08-23 10:18 ` Andrew Rybchenko
@ 2021-09-07  8:47 ` Ferruh Yigit
  2021-09-08 10:29   ` [dpdk-dev] [EXT] " Anoob Joseph
  2021-09-08  6:34 ` [dpdk-dev] " Xu, Rosen
  2022-01-03 15:08 ` [PATCH 0/8] ethdev: introduce IP " Akhil Goyal
  3 siblings, 1 reply; 53+ messages in thread
From: Ferruh Yigit @ 2021-09-07  8:47 UTC (permalink / raw)
  To: Akhil Goyal, dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, adwivedi, andrew.rybchenko

On 8/23/2021 11:02 AM, Akhil Goyal wrote:
> Reassembly is a costly operation if it is done in
> software, however, if it is offloaded to HW, it can
> considerably save application cycles.
> The operation becomes even more costlier if IP fragmants
> are encrypted.
> 
> To resolve above two issues, a new offload
> DEV_RX_OFFLOAD_REASSEMBLY is introduced in ethdev for
> devices which can attempt reassembly of packets in hardware.
> rte_eth_dev_info is added with the reassembly capabilities
> which a device can support.
> Now, if IP fragments are encrypted, reassembly can also be
> attempted while doing inline IPsec processing.
> This is controlled by a flag in rte_security_ipsec_sa_options
> to enable reassembly of encrypted IP fragments in the inline
> path.
> 
> The resulting reassembled packet would be a typical
> segmented mbuf in case of success.
> 
> And if reassembly of fragments is failed or is incomplete (if
> fragments do not come before the reass_timeout), the mbuf is
> updated with an ol_flag PKT_RX_REASSEMBLY_INCOMPLETE and
> mbuf is returned as is. Now application may decide the fate
> of the packet to wait more for fragments to come or drop.
> 
> Signed-off-by: Akhil Goyal <gakhil@marvell.com>
> ---
>  lib/ethdev/rte_ethdev.c     |  1 +
>  lib/ethdev/rte_ethdev.h     | 18 +++++++++++++++++-
>  lib/mbuf/rte_mbuf_core.h    |  3 ++-
>  lib/security/rte_security.h | 10 ++++++++++
>  4 files changed, 30 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index 9d95cd11e1..1ab3a093cf 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -119,6 +119,7 @@ static const struct {
>  	RTE_RX_OFFLOAD_BIT2STR(VLAN_FILTER),
>  	RTE_RX_OFFLOAD_BIT2STR(VLAN_EXTEND),
>  	RTE_RX_OFFLOAD_BIT2STR(JUMBO_FRAME),
> +	RTE_RX_OFFLOAD_BIT2STR(REASSEMBLY),
>  	RTE_RX_OFFLOAD_BIT2STR(SCATTER),
>  	RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP),
>  	RTE_RX_OFFLOAD_BIT2STR(SECURITY),
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index d2b27c351f..e89a4dc1eb 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -1360,6 +1360,7 @@ struct rte_eth_conf {
>  #define DEV_RX_OFFLOAD_VLAN_FILTER	0x00000200
>  #define DEV_RX_OFFLOAD_VLAN_EXTEND	0x00000400
>  #define DEV_RX_OFFLOAD_JUMBO_FRAME	0x00000800
> +#define DEV_RX_OFFLOAD_REASSEMBLY	0x00001000

previous '0x00001000' was 'DEV_RX_OFFLOAD_CRC_STRIP', it has been long that
offload has been removed, but not sure if it cause any problem to re-use it.

>  #define DEV_RX_OFFLOAD_SCATTER		0x00002000
>  /**
>   * Timestamp is set by the driver in RTE_MBUF_DYNFIELD_TIMESTAMP_NAME
> @@ -1477,6 +1478,20 @@ struct rte_eth_dev_portconf {
>   */
>  #define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID	(UINT16_MAX)
>  
> +/**
> + * Reassembly capabilities that a device can support.
> + * The device which can support reassembly offload should set
> + * DEV_RX_OFFLOAD_REASSEMBLY
> + */
> +struct rte_eth_reass_capa {
> +	/** Maximum time in ns that a fragment can wait for further fragments */
> +	uint64_t reass_timeout;
> +	/** Maximum number of fragments that device can reassemble */
> +	uint16_t max_frags;
> +	/** Reserved for future capabilities */
> +	uint16_t reserved[3];
> +};
> +

I wonder if there is any other hardware around supports reassembly offload, it
would be good to get more feedback on the capabilities list.

>  /**
>   * Ethernet device associated switch information
>   */
> @@ -1582,8 +1597,9 @@ struct rte_eth_dev_info {
>  	 * embedded managed interconnect/switch.
>  	 */
>  	struct rte_eth_switch_info switch_info;
> +	/* Reassembly capabilities of a device for reassembly offload */
> +	struct rte_eth_reass_capa reass_capa;
>  
> -	uint64_t reserved_64s[2]; /**< Reserved for future fields */

Reserved fields were added to be able to update the struct without breaking the
ABI, so that a critical change doesn't have to wait until next ABI break release.
Since this is ABI break release, we can keep the reserved field and add the new
struct. Or this can be an opportunity to get rid of the reserved field.

Personally I have no objection to get rid of the reserved field, but better to
agree on this explicitly.

>  	void *reserved_ptrs[2];   /**< Reserved for future fields */
>  };
>  
> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> index bb38d7f581..cea25c87f7 100644
> --- a/lib/mbuf/rte_mbuf_core.h
> +++ b/lib/mbuf/rte_mbuf_core.h
> @@ -200,10 +200,11 @@ extern "C" {
>  #define PKT_RX_OUTER_L4_CKSUM_BAD	(1ULL << 21)
>  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
>  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
> +#define PKT_RX_REASSEMBLY_INCOMPLETE	(1ULL << 23)
>  

Similar comment with Andrew's, what is the expectation from application if this
flag exists? Can we drop it to simplify the logic in the application?

>  /* add new RX flags here, don't forget to update PKT_FIRST_FREE */
>  
> -#define PKT_FIRST_FREE (1ULL << 23)
> +#define PKT_FIRST_FREE (1ULL << 24)
>  #define PKT_LAST_FREE (1ULL << 40)
>  
>  /* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
> index 88d31de0a6..364eeb5cd4 100644
> --- a/lib/security/rte_security.h
> +++ b/lib/security/rte_security.h
> @@ -181,6 +181,16 @@ struct rte_security_ipsec_sa_options {
>  	 * * 0: Disable per session security statistics collection for this SA.
>  	 */
>  	uint32_t stats : 1;
> +
> +	/** Enable reassembly on incoming packets.
> +	 *
> +	 * * 1: Enable driver to try reassembly of encrypted IP packets for
> +	 *      this SA, if supported by the driver. This feature will work
> +	 *      only if rx_offload DEV_RX_OFFLOAD_REASSEMBLY is set in
> +	 *      inline ethernet device.
> +	 * * 0: Disable reassembly of packets (default).
> +	 */
> +	uint32_t reass_en : 1;
>  };
>  
>  /** IPSec security association direction */
> 


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [dpdk-dev] [PATCH] RFC: ethdev: add reassembly offload
  2021-08-23 10:02 [dpdk-dev] [PATCH] RFC: ethdev: add reassembly offload Akhil Goyal
  2021-08-23 10:18 ` Andrew Rybchenko
  2021-09-07  8:47 ` [dpdk-dev] " Ferruh Yigit
@ 2021-09-08  6:34 ` Xu, Rosen
  2021-09-08  6:36   ` Xu, Rosen
  2022-01-03 15:08 ` [PATCH 0/8] ethdev: introduce IP " Akhil Goyal
  3 siblings, 1 reply; 53+ messages in thread
From: Xu, Rosen @ 2021-09-08  6:34 UTC (permalink / raw)
  To: Akhil Goyal, dev
  Cc: anoobj, Nicolau, Radu, Doherty, Declan, hemant.agrawal, matan,
	Ananyev, Konstantin, thomas, adwivedi, Yigit, Ferruh,
	andrew.rybchenko

Hi,

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Akhil Goyal
> Sent: Monday, August 23, 2021 18:03
> To: dev@dpdk.org
> Cc: anoobj@marvell.com; Nicolau, Radu <radu.nicolau@intel.com>; Doherty,
> Declan <declan.doherty@intel.com>; hemant.agrawal@nxp.com;
> matan@nvidia.com; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> thomas@monjalon.net; adwivedi@marvell.com; Yigit, Ferruh
> <ferruh.yigit@intel.com>; andrew.rybchenko@oktetlabs.ru; Akhil Goyal
> <gakhil@marvell.com>
> Subject: [dpdk-dev] [PATCH] RFC: ethdev: add reassembly offload
> 
> Reassembly is a costly operation if it is done in software, however, if it is
> offloaded to HW, it can considerably save application cycles.
> The operation becomes even more costlier if IP fragmants are encrypted.
> 
> To resolve above two issues, a new offload DEV_RX_OFFLOAD_REASSEMBLY
> is introduced in ethdev for devices which can attempt reassembly of packets
> in hardware.
> rte_eth_dev_info is added with the reassembly capabilities which a device
> can support.
> Now, if IP fragments are encrypted, reassembly can also be attempted while
> doing inline IPsec processing.
> This is controlled by a flag in rte_security_ipsec_sa_options to enable
> reassembly of encrypted IP fragments in the inline path.
> 
> The resulting reassembled packet would be a typical segmented mbuf in case
> of success.
> 
> And if reassembly of fragments is failed or is incomplete (if fragments do not
> come before the reass_timeout), the mbuf is updated with an ol_flag
> PKT_RX_REASSEMBLY_INCOMPLETE and mbuf is returned as is. Now
> application may decide the fate of the packet to wait more for fragments to
> come or drop.
> 
> Signed-off-by: Akhil Goyal <gakhil@marvell.com>
> ---
>  lib/ethdev/rte_ethdev.c     |  1 +
>  lib/ethdev/rte_ethdev.h     | 18 +++++++++++++++++-
>  lib/mbuf/rte_mbuf_core.h    |  3 ++-
>  lib/security/rte_security.h | 10 ++++++++++
>  4 files changed, 30 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
> 9d95cd11e1..1ab3a093cf 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -119,6 +119,7 @@ static const struct {
>  	RTE_RX_OFFLOAD_BIT2STR(VLAN_FILTER),
>  	RTE_RX_OFFLOAD_BIT2STR(VLAN_EXTEND),
>  	RTE_RX_OFFLOAD_BIT2STR(JUMBO_FRAME),
> +	RTE_RX_OFFLOAD_BIT2STR(REASSEMBLY),
>  	RTE_RX_OFFLOAD_BIT2STR(SCATTER),
>  	RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP),
>  	RTE_RX_OFFLOAD_BIT2STR(SECURITY),
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
> d2b27c351f..e89a4dc1eb 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -1360,6 +1360,7 @@ struct rte_eth_conf {
>  #define DEV_RX_OFFLOAD_VLAN_FILTER	0x00000200
>  #define DEV_RX_OFFLOAD_VLAN_EXTEND	0x00000400
>  #define DEV_RX_OFFLOAD_JUMBO_FRAME	0x00000800
> +#define DEV_RX_OFFLOAD_REASSEMBLY	0x00001000
>  #define DEV_RX_OFFLOAD_SCATTER		0x00002000
>  /**
>   * Timestamp is set by the driver in
> RTE_MBUF_DYNFIELD_TIMESTAMP_NAME @@ -1477,6 +1478,20 @@ struct
> rte_eth_dev_portconf {
>   */
>  #define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID
> 	(UINT16_MAX)
> 
> +/**
> + * Reassembly capabilities that a device can support.
> + * The device which can support reassembly offload should set
> + * DEV_RX_OFFLOAD_REASSEMBLY
> + */
> +struct rte_eth_reass_capa {
> +	/** Maximum time in ns that a fragment can wait for further
> fragments */
> +	uint64_t reass_timeout;
> +	/** Maximum number of fragments that device can reassemble */
> +	uint16_t max_frags;
> +	/** Reserved for future capabilities */
> +	uint16_t reserved[3];
> +};

IP reassembly occurs at the final recipient of the message, NIC attempts to do it has a fer challenges. The reason is that having NICs need to worry about reassembling fragments would increase their complexity, so most likely it only can handle range length of datagrams. Seems rte_eth_reass_capa miss the max original datagrams length which NIC can support, this features is better to be negotiated between NIC and SW as well.

>  /**
>   * Ethernet device associated switch information
>   */
> @@ -1582,8 +1597,9 @@ struct rte_eth_dev_info {
>  	 * embedded managed interconnect/switch.
>  	 */
>  	struct rte_eth_switch_info switch_info;
> +	/* Reassembly capabilities of a device for reassembly offload */
> +	struct rte_eth_reass_capa reass_capa;
> 
> -	uint64_t reserved_64s[2]; /**< Reserved for future fields */
>  	void *reserved_ptrs[2];   /**< Reserved for future fields */
>  };
> 
> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h index
> bb38d7f581..cea25c87f7 100644
> --- a/lib/mbuf/rte_mbuf_core.h
> +++ b/lib/mbuf/rte_mbuf_core.h
> @@ -200,10 +200,11 @@ extern "C" {
>  #define PKT_RX_OUTER_L4_CKSUM_BAD	(1ULL << 21)
>  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
>  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
> +#define PKT_RX_REASSEMBLY_INCOMPLETE	(1ULL << 23)
> 
>  /* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> 
> -#define PKT_FIRST_FREE (1ULL << 23)
> +#define PKT_FIRST_FREE (1ULL << 24)
>  #define PKT_LAST_FREE (1ULL << 40)
> 
>  /* add new TX flags here, don't forget to update PKT_LAST_FREE  */ diff --git
> a/lib/security/rte_security.h b/lib/security/rte_security.h index
> 88d31de0a6..364eeb5cd4 100644
> --- a/lib/security/rte_security.h
> +++ b/lib/security/rte_security.h
> @@ -181,6 +181,16 @@ struct rte_security_ipsec_sa_options {
>  	 * * 0: Disable per session security statistics collection for this SA.
>  	 */
>  	uint32_t stats : 1;
> +
> +	/** Enable reassembly on incoming packets.
> +	 *
> +	 * * 1: Enable driver to try reassembly of encrypted IP packets for
> +	 *      this SA, if supported by the driver. This feature will work
> +	 *      only if rx_offload DEV_RX_OFFLOAD_REASSEMBLY is set in
> +	 *      inline ethernet device.
> +	 * * 0: Disable reassembly of packets (default).
> +	 */
> +	uint32_t reass_en : 1;
>  };
> 
>  /** IPSec security association direction */
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [dpdk-dev] [PATCH] RFC: ethdev: add reassembly offload
  2021-09-08  6:34 ` [dpdk-dev] " Xu, Rosen
@ 2021-09-08  6:36   ` Xu, Rosen
  0 siblings, 0 replies; 53+ messages in thread
From: Xu, Rosen @ 2021-09-08  6:36 UTC (permalink / raw)
  To: Xu, Rosen, Akhil Goyal, dev
  Cc: anoobj, Nicolau, Radu, Doherty, Declan, hemant.agrawal, matan,
	Ananyev, Konstantin, thomas, adwivedi, Yigit, Ferruh,
	andrew.rybchenko, Xu, Rosen

Cc  myself

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Xu, Rosen
> Sent: Wednesday, September 08, 2021 14:34
> To: Akhil Goyal <gakhil@marvell.com>; dev@dpdk.org
> Cc: anoobj@marvell.com; Nicolau, Radu <radu.nicolau@intel.com>; Doherty,
> Declan <declan.doherty@intel.com>; hemant.agrawal@nxp.com;
> matan@nvidia.com; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> thomas@monjalon.net; adwivedi@marvell.com; Yigit, Ferruh
> <ferruh.yigit@intel.com>; andrew.rybchenko@oktetlabs.ru
> Subject: Re: [dpdk-dev] [PATCH] RFC: ethdev: add reassembly offload
> 
> Hi,
> 
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Akhil Goyal
> > Sent: Monday, August 23, 2021 18:03
> > To: dev@dpdk.org
> > Cc: anoobj@marvell.com; Nicolau, Radu <radu.nicolau@intel.com>;
> > Doherty, Declan <declan.doherty@intel.com>; hemant.agrawal@nxp.com;
> > matan@nvidia.com; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>;
> > thomas@monjalon.net; adwivedi@marvell.com; Yigit, Ferruh
> > <ferruh.yigit@intel.com>; andrew.rybchenko@oktetlabs.ru; Akhil Goyal
> > <gakhil@marvell.com>
> > Subject: [dpdk-dev] [PATCH] RFC: ethdev: add reassembly offload
> >
> > Reassembly is a costly operation if it is done in software, however,
> > if it is offloaded to HW, it can considerably save application cycles.
> > The operation becomes even more costlier if IP fragmants are encrypted.
> >
> > To resolve above two issues, a new offload
> DEV_RX_OFFLOAD_REASSEMBLY
> > is introduced in ethdev for devices which can attempt reassembly of
> > packets in hardware.
> > rte_eth_dev_info is added with the reassembly capabilities which a
> > device can support.
> > Now, if IP fragments are encrypted, reassembly can also be attempted
> > while doing inline IPsec processing.
> > This is controlled by a flag in rte_security_ipsec_sa_options to
> > enable reassembly of encrypted IP fragments in the inline path.
> >
> > The resulting reassembled packet would be a typical segmented mbuf in
> > case of success.
> >
> > And if reassembly of fragments is failed or is incomplete (if
> > fragments do not come before the reass_timeout), the mbuf is updated
> > with an ol_flag PKT_RX_REASSEMBLY_INCOMPLETE and mbuf is returned
> as
> > is. Now application may decide the fate of the packet to wait more for
> > fragments to come or drop.
> >
> > Signed-off-by: Akhil Goyal <gakhil@marvell.com>
> > ---
> >  lib/ethdev/rte_ethdev.c     |  1 +
> >  lib/ethdev/rte_ethdev.h     | 18 +++++++++++++++++-
> >  lib/mbuf/rte_mbuf_core.h    |  3 ++-
> >  lib/security/rte_security.h | 10 ++++++++++
> >  4 files changed, 30 insertions(+), 2 deletions(-)
> >
> > diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
> > 9d95cd11e1..1ab3a093cf 100644
> > --- a/lib/ethdev/rte_ethdev.c
> > +++ b/lib/ethdev/rte_ethdev.c
> > @@ -119,6 +119,7 @@ static const struct {
> >  	RTE_RX_OFFLOAD_BIT2STR(VLAN_FILTER),
> >  	RTE_RX_OFFLOAD_BIT2STR(VLAN_EXTEND),
> >  	RTE_RX_OFFLOAD_BIT2STR(JUMBO_FRAME),
> > +	RTE_RX_OFFLOAD_BIT2STR(REASSEMBLY),
> >  	RTE_RX_OFFLOAD_BIT2STR(SCATTER),
> >  	RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP),
> >  	RTE_RX_OFFLOAD_BIT2STR(SECURITY),
> > diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
> > d2b27c351f..e89a4dc1eb 100644
> > --- a/lib/ethdev/rte_ethdev.h
> > +++ b/lib/ethdev/rte_ethdev.h
> > @@ -1360,6 +1360,7 @@ struct rte_eth_conf {
> >  #define DEV_RX_OFFLOAD_VLAN_FILTER	0x00000200
> >  #define DEV_RX_OFFLOAD_VLAN_EXTEND	0x00000400
> >  #define DEV_RX_OFFLOAD_JUMBO_FRAME	0x00000800
> > +#define DEV_RX_OFFLOAD_REASSEMBLY	0x00001000
> >  #define DEV_RX_OFFLOAD_SCATTER		0x00002000
> >  /**
> >   * Timestamp is set by the driver in
> > RTE_MBUF_DYNFIELD_TIMESTAMP_NAME @@ -1477,6 +1478,20 @@
> struct
> > rte_eth_dev_portconf {
> >   */
> >  #define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID
> > 	(UINT16_MAX)
> >
> > +/**
> > + * Reassembly capabilities that a device can support.
> > + * The device which can support reassembly offload should set
> > + * DEV_RX_OFFLOAD_REASSEMBLY
> > + */
> > +struct rte_eth_reass_capa {
> > +	/** Maximum time in ns that a fragment can wait for further
> > fragments */
> > +	uint64_t reass_timeout;
> > +	/** Maximum number of fragments that device can reassemble */
> > +	uint16_t max_frags;
> > +	/** Reserved for future capabilities */
> > +	uint16_t reserved[3];
> > +};
> 
> IP reassembly occurs at the final recipient of the message, NIC attempts to
> do it has a fer challenges. The reason is that having NICs need to worry about
> reassembling fragments would increase their complexity, so most likely it
> only can handle range length of datagrams. Seems rte_eth_reass_capa miss
> the max original datagrams length which NIC can support, this features is
> better to be negotiated between NIC and SW as well.
> 
> >  /**
> >   * Ethernet device associated switch information
> >   */
> > @@ -1582,8 +1597,9 @@ struct rte_eth_dev_info {
> >  	 * embedded managed interconnect/switch.
> >  	 */
> >  	struct rte_eth_switch_info switch_info;
> > +	/* Reassembly capabilities of a device for reassembly offload */
> > +	struct rte_eth_reass_capa reass_capa;
> >
> > -	uint64_t reserved_64s[2]; /**< Reserved for future fields */
> >  	void *reserved_ptrs[2];   /**< Reserved for future fields */
> >  };
> >
> > diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h index
> > bb38d7f581..cea25c87f7 100644
> > --- a/lib/mbuf/rte_mbuf_core.h
> > +++ b/lib/mbuf/rte_mbuf_core.h
> > @@ -200,10 +200,11 @@ extern "C" {
> >  #define PKT_RX_OUTER_L4_CKSUM_BAD	(1ULL << 21)
> >  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
> >  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL
> << 22))
> > +#define PKT_RX_REASSEMBLY_INCOMPLETE	(1ULL << 23)
> >
> >  /* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> >
> > -#define PKT_FIRST_FREE (1ULL << 23)
> > +#define PKT_FIRST_FREE (1ULL << 24)
> >  #define PKT_LAST_FREE (1ULL << 40)
> >
> >  /* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> > diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
> > index
> > 88d31de0a6..364eeb5cd4 100644
> > --- a/lib/security/rte_security.h
> > +++ b/lib/security/rte_security.h
> > @@ -181,6 +181,16 @@ struct rte_security_ipsec_sa_options {
> >  	 * * 0: Disable per session security statistics collection for this SA.
> >  	 */
> >  	uint32_t stats : 1;
> > +
> > +	/** Enable reassembly on incoming packets.
> > +	 *
> > +	 * * 1: Enable driver to try reassembly of encrypted IP packets for
> > +	 *      this SA, if supported by the driver. This feature will work
> > +	 *      only if rx_offload DEV_RX_OFFLOAD_REASSEMBLY is set in
> > +	 *      inline ethernet device.
> > +	 * * 0: Disable reassembly of packets (default).
> > +	 */
> > +	uint32_t reass_en : 1;
> >  };
> >
> >  /** IPSec security association direction */
> > --
> > 2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [dpdk-dev] [EXT] Re: [PATCH] RFC: ethdev: add reassembly offload
  2021-09-07  8:47 ` [dpdk-dev] " Ferruh Yigit
@ 2021-09-08 10:29   ` Anoob Joseph
  2021-09-13  6:56     ` Xu, Rosen
  0 siblings, 1 reply; 53+ messages in thread
From: Anoob Joseph @ 2021-09-08 10:29 UTC (permalink / raw)
  To: Ferruh Yigit, Xu, Rosen, Andrew Rybchenko
  Cc: radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, Ankur Dwivedi, andrew.rybchenko,
	Akhil Goyal, dev

Hi Ferruh, Rosen, Andrew,

Please see inline.

Thanks,
Anoob

> Subject: [EXT] Re: [PATCH] RFC: ethdev: add reassembly offload
> 
> External Email
> 
> ----------------------------------------------------------------------
> On 8/23/2021 11:02 AM, Akhil Goyal wrote:
> > Reassembly is a costly operation if it is done in software, however,
> > if it is offloaded to HW, it can considerably save application cycles.
> > The operation becomes even more costlier if IP fragmants are
> > encrypted.
> >
> > To resolve above two issues, a new offload
> DEV_RX_OFFLOAD_REASSEMBLY
> > is introduced in ethdev for devices which can attempt reassembly of
> > packets in hardware.
> > rte_eth_dev_info is added with the reassembly capabilities which a
> > device can support.
> > Now, if IP fragments are encrypted, reassembly can also be attempted
> > while doing inline IPsec processing.
> > This is controlled by a flag in rte_security_ipsec_sa_options to
> > enable reassembly of encrypted IP fragments in the inline path.
> >
> > The resulting reassembled packet would be a typical segmented mbuf in
> > case of success.
> >
> > And if reassembly of fragments is failed or is incomplete (if
> > fragments do not come before the reass_timeout), the mbuf is updated
> > with an ol_flag PKT_RX_REASSEMBLY_INCOMPLETE and mbuf is returned
> as
> > is. Now application may decide the fate of the packet to wait more for
> > fragments to come or drop.
> >
> > Signed-off-by: Akhil Goyal <gakhil@marvell.com>
> > ---
> >  lib/ethdev/rte_ethdev.c     |  1 +
> >  lib/ethdev/rte_ethdev.h     | 18 +++++++++++++++++-
> >  lib/mbuf/rte_mbuf_core.h    |  3 ++-
> >  lib/security/rte_security.h | 10 ++++++++++
> >  4 files changed, 30 insertions(+), 2 deletions(-)
> >
> > diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
> > 9d95cd11e1..1ab3a093cf 100644
> > --- a/lib/ethdev/rte_ethdev.c
> > +++ b/lib/ethdev/rte_ethdev.c
> > @@ -119,6 +119,7 @@ static const struct {
> >  	RTE_RX_OFFLOAD_BIT2STR(VLAN_FILTER),
> >  	RTE_RX_OFFLOAD_BIT2STR(VLAN_EXTEND),
> >  	RTE_RX_OFFLOAD_BIT2STR(JUMBO_FRAME),
> > +	RTE_RX_OFFLOAD_BIT2STR(REASSEMBLY),
> >  	RTE_RX_OFFLOAD_BIT2STR(SCATTER),
> >  	RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP),
> >  	RTE_RX_OFFLOAD_BIT2STR(SECURITY),
> > diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
> > d2b27c351f..e89a4dc1eb 100644
> > --- a/lib/ethdev/rte_ethdev.h
> > +++ b/lib/ethdev/rte_ethdev.h
> > @@ -1360,6 +1360,7 @@ struct rte_eth_conf {
> >  #define DEV_RX_OFFLOAD_VLAN_FILTER	0x00000200
> >  #define DEV_RX_OFFLOAD_VLAN_EXTEND	0x00000400
> >  #define DEV_RX_OFFLOAD_JUMBO_FRAME	0x00000800
> > +#define DEV_RX_OFFLOAD_REASSEMBLY	0x00001000
> 
> previous '0x00001000' was 'DEV_RX_OFFLOAD_CRC_STRIP', it has been long
> that offload has been removed, but not sure if it cause any problem to re-
> use it.
> 
> >  #define DEV_RX_OFFLOAD_SCATTER		0x00002000
> >  /**
> >   * Timestamp is set by the driver in
> RTE_MBUF_DYNFIELD_TIMESTAMP_NAME
> > @@ -1477,6 +1478,20 @@ struct rte_eth_dev_portconf {
> >   */
> >  #define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID
> 	(UINT16_MAX)
> >
> > +/**
> > + * Reassembly capabilities that a device can support.
> > + * The device which can support reassembly offload should set
> > + * DEV_RX_OFFLOAD_REASSEMBLY
> > + */
> > +struct rte_eth_reass_capa {
> > +	/** Maximum time in ns that a fragment can wait for further
> fragments */
> > +	uint64_t reass_timeout;
> > +	/** Maximum number of fragments that device can reassemble */
> > +	uint16_t max_frags;
> > +	/** Reserved for future capabilities */
> > +	uint16_t reserved[3];
> > +};
> > +
> 
> I wonder if there is any other hardware around supports reassembly offload,
> it would be good to get more feedback on the capabilities list.
> 
> >  /**
> >   * Ethernet device associated switch information
> >   */
> > @@ -1582,8 +1597,9 @@ struct rte_eth_dev_info {
> >  	 * embedded managed interconnect/switch.
> >  	 */
> >  	struct rte_eth_switch_info switch_info;
> > +	/* Reassembly capabilities of a device for reassembly offload */
> > +	struct rte_eth_reass_capa reass_capa;
> >
> > -	uint64_t reserved_64s[2]; /**< Reserved for future fields */
> 
> Reserved fields were added to be able to update the struct without breaking
> the ABI, so that a critical change doesn't have to wait until next ABI break
> release.
> Since this is ABI break release, we can keep the reserved field and add the
> new struct. Or this can be an opportunity to get rid of the reserved field.
> 
> Personally I have no objection to get rid of the reserved field, but better to
> agree on this explicitly.
> 
> >  	void *reserved_ptrs[2];   /**< Reserved for future fields */
> >  };
> >
> > diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h index
> > bb38d7f581..cea25c87f7 100644
> > --- a/lib/mbuf/rte_mbuf_core.h
> > +++ b/lib/mbuf/rte_mbuf_core.h
> > @@ -200,10 +200,11 @@ extern "C" {
> >  #define PKT_RX_OUTER_L4_CKSUM_BAD	(1ULL << 21)
> >  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
> >  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL
> << 22))
> > +#define PKT_RX_REASSEMBLY_INCOMPLETE	(1ULL << 23)
> >
> 
> Similar comment with Andrew's, what is the expectation from application if
> this flag exists? Can we drop it to simplify the logic in the application?

[Anoob] There can be few cases where hardware/NIC attempts inline reassembly but it fails to complete it

1. Number of fragments is larger than what is supported by the hardware
2. Hardware reassembly resources are exhausted (due to limited reassembly contexts etc)
3. Reassembly errors such as overlapping fragments
4. Wait time exhausted (or reassembly timeout)

In such cases, application would be required to retrieve the original fragments so that it can attempt reassembly in software. The incomplete flag is useful for 2 purposes basically,
1. Application would need to retrieve the time the fragment has already spend in hardware reassembly so that software reassembly attempt can compensate for it. Otherwise, reassembly timeout across hardware + software will not be accurate
2. Retrieve original fragments. With this proposal, an incomplete reassembly would result in a chained mbuf but the segments need not be consecutive. To explain bit more,

Suppose we have a packet that is fragmented into 3 fragments, and fragment 3 & fragment 1 arrives in that order. Fragment 2 didn't arrive and hardware ultimately pushes it. In that case, application would be receiving a chained/segmented mbuf with fragment 1 & fragment 3 chained.

Now, this chained mbuf can't be treated like a regular chained mbuf. Each fragment would have its IP hdr and there are fragments missing in between. The only thing application is expected to do is, retrieve fragments, push it to s/w reassembly.
 
> 
> >  /* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> >
> > -#define PKT_FIRST_FREE (1ULL << 23)
> > +#define PKT_FIRST_FREE (1ULL << 24)
> >  #define PKT_LAST_FREE (1ULL << 40)
> >
> >  /* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> > diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
> > index 88d31de0a6..364eeb5cd4 100644
> > --- a/lib/security/rte_security.h
> > +++ b/lib/security/rte_security.h
> > @@ -181,6 +181,16 @@ struct rte_security_ipsec_sa_options {
> >  	 * * 0: Disable per session security statistics collection for this SA.
> >  	 */
> >  	uint32_t stats : 1;
> > +
> > +	/** Enable reassembly on incoming packets.
> > +	 *
> > +	 * * 1: Enable driver to try reassembly of encrypted IP packets for
> > +	 *      this SA, if supported by the driver. This feature will work
> > +	 *      only if rx_offload DEV_RX_OFFLOAD_REASSEMBLY is set in
> > +	 *      inline ethernet device.
> > +	 * * 0: Disable reassembly of packets (default).
> > +	 */
> > +	uint32_t reass_en : 1;
> >  };
> >
> >  /** IPSec security association direction */
> >


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [dpdk-dev] [EXT] Re: [PATCH] RFC: ethdev: add reassembly offload
  2021-09-08 10:29   ` [dpdk-dev] [EXT] " Anoob Joseph
@ 2021-09-13  6:56     ` Xu, Rosen
  2021-09-13  7:22       ` Andrew Rybchenko
  0 siblings, 1 reply; 53+ messages in thread
From: Xu, Rosen @ 2021-09-13  6:56 UTC (permalink / raw)
  To: Anoob Joseph, Yigit, Ferruh, Andrew Rybchenko
  Cc: Nicolau, Radu, Doherty, Declan, hemant.agrawal, matan, Ananyev,
	Konstantin, thomas, Ankur Dwivedi, andrew.rybchenko, Akhil Goyal,
	dev, Xu, Rosen

Hi,

> -----Original Message-----
> From: Anoob Joseph <anoobj@marvell.com>
> Sent: Wednesday, September 08, 2021 18:30
> To: Yigit, Ferruh <ferruh.yigit@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
> Andrew Rybchenko <arybchenko@solarflare.com>
> Cc: Nicolau, Radu <radu.nicolau@intel.com>; Doherty, Declan
> <declan.doherty@intel.com>; hemant.agrawal@nxp.com;
> matan@nvidia.com; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> thomas@monjalon.net; Ankur Dwivedi <adwivedi@marvell.com>;
> andrew.rybchenko@oktetlabs.ru; Akhil Goyal <gakhil@marvell.com>;
> dev@dpdk.org
> Subject: RE: [EXT] Re: [PATCH] RFC: ethdev: add reassembly offload
> 
> Hi Ferruh, Rosen, Andrew,
> 
> Please see inline.
> 
> Thanks,
> Anoob
> 
> > Subject: [EXT] Re: [PATCH] RFC: ethdev: add reassembly offload
> >
> > External Email
> >
> > ----------------------------------------------------------------------
> > On 8/23/2021 11:02 AM, Akhil Goyal wrote:
> > > Reassembly is a costly operation if it is done in software, however,
> > > if it is offloaded to HW, it can considerably save application cycles.
> > > The operation becomes even more costlier if IP fragmants are
> > > encrypted.
> > >
> > > To resolve above two issues, a new offload
> > DEV_RX_OFFLOAD_REASSEMBLY
> > > is introduced in ethdev for devices which can attempt reassembly of
> > > packets in hardware.
> > > rte_eth_dev_info is added with the reassembly capabilities which a
> > > device can support.
> > > Now, if IP fragments are encrypted, reassembly can also be attempted
> > > while doing inline IPsec processing.
> > > This is controlled by a flag in rte_security_ipsec_sa_options to
> > > enable reassembly of encrypted IP fragments in the inline path.
> > >
> > > The resulting reassembled packet would be a typical segmented mbuf
> > > in case of success.
> > >
> > > And if reassembly of fragments is failed or is incomplete (if
> > > fragments do not come before the reass_timeout), the mbuf is updated
> > > with an ol_flag PKT_RX_REASSEMBLY_INCOMPLETE and mbuf is returned
> > as
> > > is. Now application may decide the fate of the packet to wait more
> > > for fragments to come or drop.
> > >
> > > Signed-off-by: Akhil Goyal <gakhil@marvell.com>
> > > ---
> > >  lib/ethdev/rte_ethdev.c     |  1 +
> > >  lib/ethdev/rte_ethdev.h     | 18 +++++++++++++++++-
> > >  lib/mbuf/rte_mbuf_core.h    |  3 ++-
> > >  lib/security/rte_security.h | 10 ++++++++++
> > >  4 files changed, 30 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
> > > 9d95cd11e1..1ab3a093cf 100644
> > > --- a/lib/ethdev/rte_ethdev.c
> > > +++ b/lib/ethdev/rte_ethdev.c
> > > @@ -119,6 +119,7 @@ static const struct {
> > >  	RTE_RX_OFFLOAD_BIT2STR(VLAN_FILTER),
> > >  	RTE_RX_OFFLOAD_BIT2STR(VLAN_EXTEND),
> > >  	RTE_RX_OFFLOAD_BIT2STR(JUMBO_FRAME),
> > > +	RTE_RX_OFFLOAD_BIT2STR(REASSEMBLY),
> > >  	RTE_RX_OFFLOAD_BIT2STR(SCATTER),
> > >  	RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP),
> > >  	RTE_RX_OFFLOAD_BIT2STR(SECURITY),
> > > diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
> > > d2b27c351f..e89a4dc1eb 100644
> > > --- a/lib/ethdev/rte_ethdev.h
> > > +++ b/lib/ethdev/rte_ethdev.h
> > > @@ -1360,6 +1360,7 @@ struct rte_eth_conf {
> > >  #define DEV_RX_OFFLOAD_VLAN_FILTER	0x00000200
> > >  #define DEV_RX_OFFLOAD_VLAN_EXTEND	0x00000400
> > >  #define DEV_RX_OFFLOAD_JUMBO_FRAME	0x00000800
> > > +#define DEV_RX_OFFLOAD_REASSEMBLY	0x00001000
> >
> > previous '0x00001000' was 'DEV_RX_OFFLOAD_CRC_STRIP', it has been
> long
> > that offload has been removed, but not sure if it cause any problem to
> > re- use it.
> >
> > >  #define DEV_RX_OFFLOAD_SCATTER		0x00002000
> > >  /**
> > >   * Timestamp is set by the driver in
> > RTE_MBUF_DYNFIELD_TIMESTAMP_NAME
> > > @@ -1477,6 +1478,20 @@ struct rte_eth_dev_portconf {
> > >   */
> > >  #define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID
> > 	(UINT16_MAX)
> > >
> > > +/**
> > > + * Reassembly capabilities that a device can support.
> > > + * The device which can support reassembly offload should set
> > > + * DEV_RX_OFFLOAD_REASSEMBLY
> > > + */
> > > +struct rte_eth_reass_capa {
> > > +	/** Maximum time in ns that a fragment can wait for further
> > fragments */
> > > +	uint64_t reass_timeout;
> > > +	/** Maximum number of fragments that device can reassemble */
> > > +	uint16_t max_frags;
> > > +	/** Reserved for future capabilities */
> > > +	uint16_t reserved[3];
> > > +};
> > > +
> >
> > I wonder if there is any other hardware around supports reassembly
> > offload, it would be good to get more feedback on the capabilities list.
> >
> > >  /**
> > >   * Ethernet device associated switch information
> > >   */
> > > @@ -1582,8 +1597,9 @@ struct rte_eth_dev_info {
> > >  	 * embedded managed interconnect/switch.
> > >  	 */
> > >  	struct rte_eth_switch_info switch_info;
> > > +	/* Reassembly capabilities of a device for reassembly offload */
> > > +	struct rte_eth_reass_capa reass_capa;
> > >
> > > -	uint64_t reserved_64s[2]; /**< Reserved for future fields */
> >
> > Reserved fields were added to be able to update the struct without
> > breaking the ABI, so that a critical change doesn't have to wait until
> > next ABI break release.
> > Since this is ABI break release, we can keep the reserved field and
> > add the new struct. Or this can be an opportunity to get rid of the reserved
> field.
> >
> > Personally I have no objection to get rid of the reserved field, but
> > better to agree on this explicitly.
> >
> > >  	void *reserved_ptrs[2];   /**< Reserved for future fields */
> > >  };
> > >
> > > diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> > > index
> > > bb38d7f581..cea25c87f7 100644
> > > --- a/lib/mbuf/rte_mbuf_core.h
> > > +++ b/lib/mbuf/rte_mbuf_core.h
> > > @@ -200,10 +200,11 @@ extern "C" {
> > >  #define PKT_RX_OUTER_L4_CKSUM_BAD	(1ULL << 21)
> > >  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
> > >  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL
> > << 22))
> > > +#define PKT_RX_REASSEMBLY_INCOMPLETE	(1ULL << 23)
> > >
> >
> > Similar comment with Andrew's, what is the expectation from
> > application if this flag exists? Can we drop it to simplify the logic in the
> application?
> 
> [Anoob] There can be few cases where hardware/NIC attempts inline
> reassembly but it fails to complete it
> 
> 1. Number of fragments is larger than what is supported by the hardware 2.
> Hardware reassembly resources are exhausted (due to limited reassembly
> contexts etc) 3. Reassembly errors such as overlapping fragments 4. Wait
> time exhausted (or reassembly timeout)
> 
> In such cases, application would be required to retrieve the original
> fragments so that it can attempt reassembly in software. The incomplete flag
> is useful for 2 purposes basically, 1. Application would need to retrieve the
> time the fragment has already spend in hardware reassembly so that
> software reassembly attempt can compensate for it. Otherwise, reassembly
> timeout across hardware + software will not be accurate 2. Retrieve original
> fragments. With this proposal, an incomplete reassembly would result in a
> chained mbuf but the segments need not be consecutive. To explain bit more,
> 
> Suppose we have a packet that is fragmented into 3 fragments, and fragment
> 3 & fragment 1 arrives in that order. Fragment 2 didn't arrive and hardware
> ultimately pushes it. In that case, application would be receiving a
> chained/segmented mbuf with fragment 1 & fragment 3 chained.
> 
> Now, this chained mbuf can't be treated like a regular chained mbuf. Each
> fragment would have its IP hdr and there are fragments missing in between.
> The only thing application is expected to do is, retrieve fragments, push it to
> s/w reassembly.

What you mentioned is error identification. But actually a negotiation about max frame size is needed before datagrams tx/rx.
> >
> > >  /* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> > >
> > > -#define PKT_FIRST_FREE (1ULL << 23)
> > > +#define PKT_FIRST_FREE (1ULL << 24)
> > >  #define PKT_LAST_FREE (1ULL << 40)
> > >
> > >  /* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> > > diff --git a/lib/security/rte_security.h
> > > b/lib/security/rte_security.h index 88d31de0a6..364eeb5cd4 100644
> > > --- a/lib/security/rte_security.h
> > > +++ b/lib/security/rte_security.h
> > > @@ -181,6 +181,16 @@ struct rte_security_ipsec_sa_options {
> > >  	 * * 0: Disable per session security statistics collection for this SA.
> > >  	 */
> > >  	uint32_t stats : 1;
> > > +
> > > +	/** Enable reassembly on incoming packets.
> > > +	 *
> > > +	 * * 1: Enable driver to try reassembly of encrypted IP packets for
> > > +	 *      this SA, if supported by the driver. This feature will work
> > > +	 *      only if rx_offload DEV_RX_OFFLOAD_REASSEMBLY is set in
> > > +	 *      inline ethernet device.
> > > +	 * * 0: Disable reassembly of packets (default).
> > > +	 */
> > > +	uint32_t reass_en : 1;
> > >  };
> > >
> > >  /** IPSec security association direction */
> > >


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [dpdk-dev] [EXT] Re: [PATCH] RFC: ethdev: add reassembly offload
  2021-09-13  6:56     ` Xu, Rosen
@ 2021-09-13  7:22       ` Andrew Rybchenko
  2021-09-14  5:14         ` Anoob Joseph
  0 siblings, 1 reply; 53+ messages in thread
From: Andrew Rybchenko @ 2021-09-13  7:22 UTC (permalink / raw)
  To: Xu, Rosen, Anoob Joseph, Yigit, Ferruh, Andrew Rybchenko
  Cc: Nicolau, Radu, Doherty, Declan, hemant.agrawal, matan, Ananyev,
	Konstantin, thomas, Ankur Dwivedi, Akhil Goyal, dev

On 9/13/21 9:56 AM, Xu, Rosen wrote:
> Hi,
> 
>> -----Original Message-----
>> From: Anoob Joseph <anoobj@marvell.com>
>> Sent: Wednesday, September 08, 2021 18:30
>> To: Yigit, Ferruh <ferruh.yigit@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
>> Andrew Rybchenko <arybchenko@solarflare.com>
>> Cc: Nicolau, Radu <radu.nicolau@intel.com>; Doherty, Declan
>> <declan.doherty@intel.com>; hemant.agrawal@nxp.com;
>> matan@nvidia.com; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
>> thomas@monjalon.net; Ankur Dwivedi <adwivedi@marvell.com>;
>> andrew.rybchenko@oktetlabs.ru; Akhil Goyal <gakhil@marvell.com>;
>> dev@dpdk.org
>> Subject: RE: [EXT] Re: [PATCH] RFC: ethdev: add reassembly offload
>>
>> Hi Ferruh, Rosen, Andrew,
>>
>> Please see inline.
>>
>> Thanks,
>> Anoob
>>
>>> Subject: [EXT] Re: [PATCH] RFC: ethdev: add reassembly offload
>>>
>>> External Email
>>>
>>> ----------------------------------------------------------------------
>>> On 8/23/2021 11:02 AM, Akhil Goyal wrote:
>>>> Reassembly is a costly operation if it is done in software, however,
>>>> if it is offloaded to HW, it can considerably save application cycles.
>>>> The operation becomes even more costlier if IP fragmants are
>>>> encrypted.
>>>>
>>>> To resolve above two issues, a new offload
>>> DEV_RX_OFFLOAD_REASSEMBLY
>>>> is introduced in ethdev for devices which can attempt reassembly of
>>>> packets in hardware.
>>>> rte_eth_dev_info is added with the reassembly capabilities which a
>>>> device can support.
>>>> Now, if IP fragments are encrypted, reassembly can also be attempted
>>>> while doing inline IPsec processing.
>>>> This is controlled by a flag in rte_security_ipsec_sa_options to
>>>> enable reassembly of encrypted IP fragments in the inline path.
>>>>
>>>> The resulting reassembled packet would be a typical segmented mbuf
>>>> in case of success.
>>>>
>>>> And if reassembly of fragments is failed or is incomplete (if
>>>> fragments do not come before the reass_timeout), the mbuf is updated
>>>> with an ol_flag PKT_RX_REASSEMBLY_INCOMPLETE and mbuf is returned
>>> as
>>>> is. Now application may decide the fate of the packet to wait more
>>>> for fragments to come or drop.
>>>>
>>>> Signed-off-by: Akhil Goyal <gakhil@marvell.com>
>>>> ---
>>>>  lib/ethdev/rte_ethdev.c     |  1 +
>>>>  lib/ethdev/rte_ethdev.h     | 18 +++++++++++++++++-
>>>>  lib/mbuf/rte_mbuf_core.h    |  3 ++-
>>>>  lib/security/rte_security.h | 10 ++++++++++
>>>>  4 files changed, 30 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
>>>> 9d95cd11e1..1ab3a093cf 100644
>>>> --- a/lib/ethdev/rte_ethdev.c
>>>> +++ b/lib/ethdev/rte_ethdev.c
>>>> @@ -119,6 +119,7 @@ static const struct {
>>>>  	RTE_RX_OFFLOAD_BIT2STR(VLAN_FILTER),
>>>>  	RTE_RX_OFFLOAD_BIT2STR(VLAN_EXTEND),
>>>>  	RTE_RX_OFFLOAD_BIT2STR(JUMBO_FRAME),
>>>> +	RTE_RX_OFFLOAD_BIT2STR(REASSEMBLY),
>>>>  	RTE_RX_OFFLOAD_BIT2STR(SCATTER),
>>>>  	RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP),
>>>>  	RTE_RX_OFFLOAD_BIT2STR(SECURITY),
>>>> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
>>>> d2b27c351f..e89a4dc1eb 100644
>>>> --- a/lib/ethdev/rte_ethdev.h
>>>> +++ b/lib/ethdev/rte_ethdev.h
>>>> @@ -1360,6 +1360,7 @@ struct rte_eth_conf {
>>>>  #define DEV_RX_OFFLOAD_VLAN_FILTER	0x00000200
>>>>  #define DEV_RX_OFFLOAD_VLAN_EXTEND	0x00000400
>>>>  #define DEV_RX_OFFLOAD_JUMBO_FRAME	0x00000800
>>>> +#define DEV_RX_OFFLOAD_REASSEMBLY	0x00001000
>>>
>>> previous '0x00001000' was 'DEV_RX_OFFLOAD_CRC_STRIP', it has been
>> long
>>> that offload has been removed, but not sure if it cause any problem to
>>> re- use it.
>>>
>>>>  #define DEV_RX_OFFLOAD_SCATTER		0x00002000
>>>>  /**
>>>>   * Timestamp is set by the driver in
>>> RTE_MBUF_DYNFIELD_TIMESTAMP_NAME
>>>> @@ -1477,6 +1478,20 @@ struct rte_eth_dev_portconf {
>>>>   */
>>>>  #define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID
>>> 	(UINT16_MAX)
>>>>
>>>> +/**
>>>> + * Reassembly capabilities that a device can support.
>>>> + * The device which can support reassembly offload should set
>>>> + * DEV_RX_OFFLOAD_REASSEMBLY
>>>> + */
>>>> +struct rte_eth_reass_capa {
>>>> +	/** Maximum time in ns that a fragment can wait for further
>>> fragments */
>>>> +	uint64_t reass_timeout;
>>>> +	/** Maximum number of fragments that device can reassemble */
>>>> +	uint16_t max_frags;
>>>> +	/** Reserved for future capabilities */
>>>> +	uint16_t reserved[3];
>>>> +};
>>>> +
>>>
>>> I wonder if there is any other hardware around supports reassembly
>>> offload, it would be good to get more feedback on the capabilities list.
>>>
>>>>  /**
>>>>   * Ethernet device associated switch information
>>>>   */
>>>> @@ -1582,8 +1597,9 @@ struct rte_eth_dev_info {
>>>>  	 * embedded managed interconnect/switch.
>>>>  	 */
>>>>  	struct rte_eth_switch_info switch_info;
>>>> +	/* Reassembly capabilities of a device for reassembly offload */
>>>> +	struct rte_eth_reass_capa reass_capa;
>>>>
>>>> -	uint64_t reserved_64s[2]; /**< Reserved for future fields */
>>>
>>> Reserved fields were added to be able to update the struct without
>>> breaking the ABI, so that a critical change doesn't have to wait until
>>> next ABI break release.
>>> Since this is ABI break release, we can keep the reserved field and
>>> add the new struct. Or this can be an opportunity to get rid of the reserved
>> field.
>>>
>>> Personally I have no objection to get rid of the reserved field, but
>>> better to agree on this explicitly.
>>>
>>>>  	void *reserved_ptrs[2];   /**< Reserved for future fields */
>>>>  };
>>>>
>>>> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
>>>> index
>>>> bb38d7f581..cea25c87f7 100644
>>>> --- a/lib/mbuf/rte_mbuf_core.h
>>>> +++ b/lib/mbuf/rte_mbuf_core.h
>>>> @@ -200,10 +200,11 @@ extern "C" {
>>>>  #define PKT_RX_OUTER_L4_CKSUM_BAD	(1ULL << 21)
>>>>  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
>>>>  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL
>>> << 22))
>>>> +#define PKT_RX_REASSEMBLY_INCOMPLETE	(1ULL << 23)
>>>>
>>>
>>> Similar comment with Andrew's, what is the expectation from
>>> application if this flag exists? Can we drop it to simplify the logic in the
>> application?
>>
>> [Anoob] There can be few cases where hardware/NIC attempts inline
>> reassembly but it fails to complete it
>>
>> 1. Number of fragments is larger than what is supported by the hardware 2.
>> Hardware reassembly resources are exhausted (due to limited reassembly
>> contexts etc) 3. Reassembly errors such as overlapping fragments 4. Wait
>> time exhausted (or reassembly timeout)
>>
>> In such cases, application would be required to retrieve the original
>> fragments so that it can attempt reassembly in software. The incomplete flag
>> is useful for 2 purposes basically, 1. Application would need to retrieve the
>> time the fragment has already spend in hardware reassembly so that
>> software reassembly attempt can compensate for it. Otherwise, reassembly
>> timeout across hardware + software will not be accurate 

Could you clarify how application will find out the time spent
in HW.

>> 2. Retrieve original
>> fragments. With this proposal, an incomplete reassembly would result in a
>> chained mbuf but the segments need not be consecutive. To explain bit more,
>>
>> Suppose we have a packet that is fragmented into 3 fragments, and fragment
>> 3 & fragment 1 arrives in that order. Fragment 2 didn't arrive and hardware
>> ultimately pushes it. In that case, application would be receiving a
>> chained/segmented mbuf with fragment 1 & fragment 3 chained.
>>
>> Now, this chained mbuf can't be treated like a regular chained mbuf. Each
>> fragment would have its IP hdr and there are fragments missing in between.
>> The only thing application is expected to do is, retrieve fragments, push it to
>> s/w reassembly.

It sounds like it conflicts with SCATTER and BUFFER_SPLIT
offloads which allow to return chained mbuf's. Don't know
if it is good or bad, but anyway it must be documented.

> 
> What you mentioned is error identification. But actually a negotiation about max frame size is needed before datagrams tx/rx.

It sounds like it is OK for informational purposes, but
right now I don't understand how it could be used by the
application. Application still has to support reassembly
in SW regardless of the information.

>>>
>>>>  /* add new RX flags here, don't forget to update PKT_FIRST_FREE */
>>>>
>>>> -#define PKT_FIRST_FREE (1ULL << 23)
>>>> +#define PKT_FIRST_FREE (1ULL << 24)
>>>>  #define PKT_LAST_FREE (1ULL << 40)
>>>>
>>>>  /* add new TX flags here, don't forget to update PKT_LAST_FREE  */
>>>> diff --git a/lib/security/rte_security.h
>>>> b/lib/security/rte_security.h index 88d31de0a6..364eeb5cd4 100644
>>>> --- a/lib/security/rte_security.h
>>>> +++ b/lib/security/rte_security.h
>>>> @@ -181,6 +181,16 @@ struct rte_security_ipsec_sa_options {
>>>>  	 * * 0: Disable per session security statistics collection for this SA.
>>>>  	 */
>>>>  	uint32_t stats : 1;
>>>> +
>>>> +	/** Enable reassembly on incoming packets.
>>>> +	 *
>>>> +	 * * 1: Enable driver to try reassembly of encrypted IP packets for
>>>> +	 *      this SA, if supported by the driver. This feature will work
>>>> +	 *      only if rx_offload DEV_RX_OFFLOAD_REASSEMBLY is set in
>>>> +	 *      inline ethernet device.
>>>> +	 * * 0: Disable reassembly of packets (default).
>>>> +	 */
>>>> +	uint32_t reass_en : 1;
>>>>  };
>>>>
>>>>  /** IPSec security association direction */
>>>>
> 


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [dpdk-dev] [EXT] Re: [PATCH] RFC: ethdev: add reassembly offload
  2021-09-13  7:22       ` Andrew Rybchenko
@ 2021-09-14  5:14         ` Anoob Joseph
  0 siblings, 0 replies; 53+ messages in thread
From: Anoob Joseph @ 2021-09-14  5:14 UTC (permalink / raw)
  To: Andrew Rybchenko, Xu, Rosen, Yigit, Ferruh, Andrew Rybchenko
  Cc: Nicolau, Radu, Doherty, Declan, hemant.agrawal, matan, Ananyev,
	Konstantin, thomas, Ankur Dwivedi, Akhil Goyal, dev

Hi Andrew, Rosen,

Please see inline.

Thanks,
Anoob

> -----Original Message-----
> From: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
> Sent: Monday, September 13, 2021 12:52 PM
> To: Xu, Rosen <rosen.xu@intel.com>; Anoob Joseph
> <anoobj@marvell.com>; Yigit, Ferruh <ferruh.yigit@intel.com>; Andrew
> Rybchenko <arybchenko@solarflare.com>
> Cc: Nicolau, Radu <radu.nicolau@intel.com>; Doherty, Declan
> <declan.doherty@intel.com>; hemant.agrawal@nxp.com;
> matan@nvidia.com; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> thomas@monjalon.net; Ankur Dwivedi <adwivedi@marvell.com>; Akhil
> Goyal <gakhil@marvell.com>; dev@dpdk.org
> Subject: Re: [EXT] Re: [PATCH] RFC: ethdev: add reassembly offload
> 
> On 9/13/21 9:56 AM, Xu, Rosen wrote:
> > Hi,
> >
> >> -----Original Message-----
> >> From: Anoob Joseph <anoobj@marvell.com>
> >> Sent: Wednesday, September 08, 2021 18:30
> >> To: Yigit, Ferruh <ferruh.yigit@intel.com>; Xu, Rosen
> >> <rosen.xu@intel.com>; Andrew Rybchenko
> <arybchenko@solarflare.com>
> >> Cc: Nicolau, Radu <radu.nicolau@intel.com>; Doherty, Declan
> >> <declan.doherty@intel.com>; hemant.agrawal@nxp.com;
> matan@nvidia.com;
> >> Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> >> thomas@monjalon.net; Ankur Dwivedi <adwivedi@marvell.com>;
> >> andrew.rybchenko@oktetlabs.ru; Akhil Goyal <gakhil@marvell.com>;
> >> dev@dpdk.org
> >> Subject: RE: [EXT] Re: [PATCH] RFC: ethdev: add reassembly offload
> >>
> >> Hi Ferruh, Rosen, Andrew,
> >>
> >> Please see inline.
> >>
> >> Thanks,
> >> Anoob
> >>
> >>> Subject: [EXT] Re: [PATCH] RFC: ethdev: add reassembly offload
> >>>
> >>> External Email
> >>>
> >>> --------------------------------------------------------------------
> >>> -- On 8/23/2021 11:02 AM, Akhil Goyal wrote:
> >>>> Reassembly is a costly operation if it is done in software,
> >>>> however, if it is offloaded to HW, it can considerably save application
> cycles.
> >>>> The operation becomes even more costlier if IP fragmants are
> >>>> encrypted.
> >>>>
> >>>> To resolve above two issues, a new offload
> >>> DEV_RX_OFFLOAD_REASSEMBLY
> >>>> is introduced in ethdev for devices which can attempt reassembly of
> >>>> packets in hardware.
> >>>> rte_eth_dev_info is added with the reassembly capabilities which a
> >>>> device can support.
> >>>> Now, if IP fragments are encrypted, reassembly can also be
> >>>> attempted while doing inline IPsec processing.
> >>>> This is controlled by a flag in rte_security_ipsec_sa_options to
> >>>> enable reassembly of encrypted IP fragments in the inline path.
> >>>>
> >>>> The resulting reassembled packet would be a typical segmented mbuf
> >>>> in case of success.
> >>>>
> >>>> And if reassembly of fragments is failed or is incomplete (if
> >>>> fragments do not come before the reass_timeout), the mbuf is
> >>>> updated with an ol_flag PKT_RX_REASSEMBLY_INCOMPLETE and mbuf
> is
> >>>> returned
> >>> as
> >>>> is. Now application may decide the fate of the packet to wait more
> >>>> for fragments to come or drop.
> >>>>
> >>>> Signed-off-by: Akhil Goyal <gakhil@marvell.com>
> >>>> ---
> >>>>  lib/ethdev/rte_ethdev.c     |  1 +
> >>>>  lib/ethdev/rte_ethdev.h     | 18 +++++++++++++++++-
> >>>>  lib/mbuf/rte_mbuf_core.h    |  3 ++-
> >>>>  lib/security/rte_security.h | 10 ++++++++++
> >>>>  4 files changed, 30 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> >>>> index 9d95cd11e1..1ab3a093cf 100644
> >>>> --- a/lib/ethdev/rte_ethdev.c
> >>>> +++ b/lib/ethdev/rte_ethdev.c
> >>>> @@ -119,6 +119,7 @@ static const struct {
> >>>>  	RTE_RX_OFFLOAD_BIT2STR(VLAN_FILTER),
> >>>>  	RTE_RX_OFFLOAD_BIT2STR(VLAN_EXTEND),
> >>>>  	RTE_RX_OFFLOAD_BIT2STR(JUMBO_FRAME),
> >>>> +	RTE_RX_OFFLOAD_BIT2STR(REASSEMBLY),
> >>>>  	RTE_RX_OFFLOAD_BIT2STR(SCATTER),
> >>>>  	RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP),
> >>>>  	RTE_RX_OFFLOAD_BIT2STR(SECURITY), diff --git
> >>>> a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
> >>>> d2b27c351f..e89a4dc1eb 100644
> >>>> --- a/lib/ethdev/rte_ethdev.h
> >>>> +++ b/lib/ethdev/rte_ethdev.h
> >>>> @@ -1360,6 +1360,7 @@ struct rte_eth_conf {
> >>>>  #define DEV_RX_OFFLOAD_VLAN_FILTER	0x00000200
> >>>>  #define DEV_RX_OFFLOAD_VLAN_EXTEND	0x00000400
> >>>>  #define DEV_RX_OFFLOAD_JUMBO_FRAME	0x00000800
> >>>> +#define DEV_RX_OFFLOAD_REASSEMBLY	0x00001000
> >>>
> >>> previous '0x00001000' was 'DEV_RX_OFFLOAD_CRC_STRIP', it has been
> >> long
> >>> that offload has been removed, but not sure if it cause any problem
> >>> to
> >>> re- use it.
> >>>
> >>>>  #define DEV_RX_OFFLOAD_SCATTER		0x00002000
> >>>>  /**
> >>>>   * Timestamp is set by the driver in
> >>> RTE_MBUF_DYNFIELD_TIMESTAMP_NAME
> >>>> @@ -1477,6 +1478,20 @@ struct rte_eth_dev_portconf {
> >>>>   */
> >>>>  #define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID
> >>> 	(UINT16_MAX)
> >>>>
> >>>> +/**
> >>>> + * Reassembly capabilities that a device can support.
> >>>> + * The device which can support reassembly offload should set
> >>>> + * DEV_RX_OFFLOAD_REASSEMBLY
> >>>> + */
> >>>> +struct rte_eth_reass_capa {
> >>>> +	/** Maximum time in ns that a fragment can wait for further
> >>> fragments */
> >>>> +	uint64_t reass_timeout;
> >>>> +	/** Maximum number of fragments that device can reassemble */
> >>>> +	uint16_t max_frags;
> >>>> +	/** Reserved for future capabilities */
> >>>> +	uint16_t reserved[3];
> >>>> +};
> >>>> +
> >>>
> >>> I wonder if there is any other hardware around supports reassembly
> >>> offload, it would be good to get more feedback on the capabilities list.
> >>>
> >>>>  /**
> >>>>   * Ethernet device associated switch information
> >>>>   */
> >>>> @@ -1582,8 +1597,9 @@ struct rte_eth_dev_info {
> >>>>  	 * embedded managed interconnect/switch.
> >>>>  	 */
> >>>>  	struct rte_eth_switch_info switch_info;
> >>>> +	/* Reassembly capabilities of a device for reassembly offload */
> >>>> +	struct rte_eth_reass_capa reass_capa;
> >>>>
> >>>> -	uint64_t reserved_64s[2]; /**< Reserved for future fields */
> >>>
> >>> Reserved fields were added to be able to update the struct without
> >>> breaking the ABI, so that a critical change doesn't have to wait
> >>> until next ABI break release.
> >>> Since this is ABI break release, we can keep the reserved field and
> >>> add the new struct. Or this can be an opportunity to get rid of the
> >>> reserved
> >> field.
> >>>
> >>> Personally I have no objection to get rid of the reserved field, but
> >>> better to agree on this explicitly.
> >>>
> >>>>  	void *reserved_ptrs[2];   /**< Reserved for future fields */
> >>>>  };
> >>>>
> >>>> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> >>>> index
> >>>> bb38d7f581..cea25c87f7 100644
> >>>> --- a/lib/mbuf/rte_mbuf_core.h
> >>>> +++ b/lib/mbuf/rte_mbuf_core.h
> >>>> @@ -200,10 +200,11 @@ extern "C" {
> >>>>  #define PKT_RX_OUTER_L4_CKSUM_BAD	(1ULL << 21)
> >>>>  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
> >>>>  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL
> >>> << 22))
> >>>> +#define PKT_RX_REASSEMBLY_INCOMPLETE	(1ULL << 23)
> >>>>
> >>>
> >>> Similar comment with Andrew's, what is the expectation from
> >>> application if this flag exists? Can we drop it to simplify the
> >>> logic in the
> >> application?
> >>
> >> [Anoob] There can be few cases where hardware/NIC attempts inline
> >> reassembly but it fails to complete it
> >>
> >> 1. Number of fragments is larger than what is supported by the hardware
> 2.
> >> Hardware reassembly resources are exhausted (due to limited
> >> reassembly contexts etc) 3. Reassembly errors such as overlapping
> >> fragments 4. Wait time exhausted (or reassembly timeout)
> >>
> >> In such cases, application would be required to retrieve the original
> >> fragments so that it can attempt reassembly in software. The
> >> incomplete flag is useful for 2 purposes basically, 1. Application
> >> would need to retrieve the time the fragment has already spend in
> >> hardware reassembly so that software reassembly attempt can
> >> compensate for it. Otherwise, reassembly timeout across hardware +
> >> software will not be accurate
> 
> Could you clarify how application will find out the time spent in HW.

[Anoob] We could use rte_mbuf dynamic fields for the same. Looks like RFC hasn't touched on this aspect yet. 
 
> 
> >> 2. Retrieve original
> >> fragments. With this proposal, an incomplete reassembly would result
> >> in a chained mbuf but the segments need not be consecutive. To
> >> explain bit more,
> >>
> >> Suppose we have a packet that is fragmented into 3 fragments, and
> >> fragment
> >> 3 & fragment 1 arrives in that order. Fragment 2 didn't arrive and
> >> hardware ultimately pushes it. In that case, application would be
> >> receiving a chained/segmented mbuf with fragment 1 & fragment 3
> chained.
> >>
> >> Now, this chained mbuf can't be treated like a regular chained mbuf.
> >> Each fragment would have its IP hdr and there are fragments missing in
> between.
> >> The only thing application is expected to do is, retrieve fragments,
> >> push it to s/w reassembly.
> 
> It sounds like it conflicts with SCATTER and BUFFER_SPLIT offloads which
> allow to return chained mbuf's. Don't know if it is good or bad, but anyway it
> must be documented.

[Anoob] Agreed.
 
> 
> >
> > What you mentioned is error identification. But actually a negotiation about
> max frame size is needed before datagrams tx/rx.

[Anoob] The actually reassembly settings would be negotiated by the s/w. The offload can be thought of like how checksum is being done now. S/w negotiates with peer and then enables the hardware to accelerate. If hardware is able to reassemble, then well and good. If not, we would have software compensate for it.
 
> 
> It sounds like it is OK for informational purposes, but right now I don't
> understand how it could be used by the application. Application still has to
> support reassembly in SW regardless of the information.

[Anoob] The additional information from "incomplete reassembly" attempt would be useful for software to properly compensate for the hardware reassembly attempt (basically, the reassembly timeout is honored across s/w + h/w  reassembly attempt). 

Benefit of such an offload is in accelerating reassembly in hardware for performance use cases. If application expects heavy fragmentation, then every packet would have a cost of ~1000 cycles (typically) to get it reassembled. By offloading this (atleast some portion of it) to hardware, application would be able to save significant cycles.

Since IP reassembly presents varying challenges depending on hardware implementation, we cannot expect complete reassembly offload in hardware. For some vendors, maximum number of fragments supported could be limited. Some vendors could have limited reassembly timeout (or wait_time). Some vendors could have limitations depending on datagram sizes. So s/w reassembly is not going away even with the proposed hardware assisted inline reassembly.

> 
> >>>
> >>>>  /* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> >>>>
> >>>> -#define PKT_FIRST_FREE (1ULL << 23)
> >>>> +#define PKT_FIRST_FREE (1ULL << 24)
> >>>>  #define PKT_LAST_FREE (1ULL << 40)
> >>>>
> >>>>  /* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> >>>> diff --git a/lib/security/rte_security.h
> >>>> b/lib/security/rte_security.h index 88d31de0a6..364eeb5cd4 100644
> >>>> --- a/lib/security/rte_security.h
> >>>> +++ b/lib/security/rte_security.h
> >>>> @@ -181,6 +181,16 @@ struct rte_security_ipsec_sa_options {
> >>>>  	 * * 0: Disable per session security statistics collection for this SA.
> >>>>  	 */
> >>>>  	uint32_t stats : 1;
> >>>> +
> >>>> +	/** Enable reassembly on incoming packets.
> >>>> +	 *
> >>>> +	 * * 1: Enable driver to try reassembly of encrypted IP packets for
> >>>> +	 *      this SA, if supported by the driver. This feature will work
> >>>> +	 *      only if rx_offload DEV_RX_OFFLOAD_REASSEMBLY is set in
> >>>> +	 *      inline ethernet device.
> >>>> +	 * * 0: Disable reassembly of packets (default).
> >>>> +	 */
> >>>> +	uint32_t reass_en : 1;
> >>>>  };
> >>>>
> >>>>  /** IPSec security association direction */
> >>>>
> >


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [dpdk-dev] [EXT] Re: [PATCH] RFC: ethdev: add reassembly offload
  2021-08-29 13:14   ` [dpdk-dev] [EXT] " Akhil Goyal
@ 2021-09-21 19:59     ` Thomas Monjalon
  0 siblings, 0 replies; 53+ messages in thread
From: Thomas Monjalon @ 2021-09-21 19:59 UTC (permalink / raw)
  To: Akhil Goyal
  Cc: Andrew Rybchenko, dev, Anoob Joseph, radu.nicolau,
	declan.doherty, hemant.agrawal, matan, konstantin.ananyev,
	Ankur Dwivedi, ferruh.yigit

29/08/2021 15:14, Akhil Goyal:
> > On 8/23/21 1:02 PM, Akhil Goyal wrote:
> > > +#define DEV_RX_OFFLOAD_REASSEMBLY	0x00001000
> > 
> > I think it should be:
> > RTE_ETH_RX_OFFLOAD_IPV4_REASSEMBLY
> > 
> > i.e. have correct prefix similar to
> > RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT and mention IPv4.
> > 
> > If we'd like to cover IPv6 as well, it could be
> > RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY and have IPv4/6
> > support bits in the offload capabilities below.
> 
> Intention is to update spec for both.
> Will update the capabilities accordingly to have both IPv4 and IPv6.
> 
> > 
> > >  #define DEV_RX_OFFLOAD_SCATTER		0x00002000
> > >  /**
> > >   * Timestamp is set by the driver in
> > RTE_MBUF_DYNFIELD_TIMESTAMP_NAME
> > > @@ -1477,6 +1478,20 @@ struct rte_eth_dev_portconf {
> > >   */
> > >  #define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID
> > 	(UINT16_MAX)
> > >
> > > +/**
> > > + * Reassembly capabilities that a device can support.
> > > + * The device which can support reassembly offload should set
> > > + * DEV_RX_OFFLOAD_REASSEMBLY
> > > + */
> > > +struct rte_eth_reass_capa {

Please add "IP" in flags, struct and comments.



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH 0/8] ethdev: introduce IP reassembly offload
  2021-08-23 10:02 [dpdk-dev] [PATCH] RFC: ethdev: add reassembly offload Akhil Goyal
                   ` (2 preceding siblings ...)
  2021-09-08  6:34 ` [dpdk-dev] " Xu, Rosen
@ 2022-01-03 15:08 ` Akhil Goyal
  2022-01-03 15:08   ` [PATCH 1/8] " Akhil Goyal
                     ` (9 more replies)
  3 siblings, 10 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-03 15:08 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, Akhil Goyal

As discussed in the RFC[1] sent in 21.11, a new offload is
introduced in ethdev for IP reassembly.

This patchset add the RX offload and an application to test it.
Currently, the offload is tested along with inline IPsec processing.
It can also be updated as a standalone offload without IPsec, if there
are some hardware available to test it.
The patchset is tested on cnxk platform. The driver implementation is
added as a separate patchset.

[1]: http://patches.dpdk.org/project/dpdk/patch/20210823100259.1619886-1-gakhil@marvell.com/


Akhil Goyal (8):
  ethdev: introduce IP reassembly offload
  ethdev: add dev op for IP reassembly configuration
  ethdev: add mbuf dynfield for incomplete IP reassembly
  security: add IPsec option for IP reassembly
  app/test: add unit cases for inline IPsec offload
  app/test: add IP reassembly case with no frags
  app/test: add IP reassembly cases with multiple fragments
  app/test: add IP reassembly negative cases

 app/test/meson.build                          |    1 +
 app/test/test_inline_ipsec.c                  | 1036 +++++++++++++++++
 .../test_inline_ipsec_reassembly_vectors.h    |  790 +++++++++++++
 doc/guides/nics/features.rst                  |   12 +
 lib/ethdev/ethdev_driver.h                    |   27 +
 lib/ethdev/rte_ethdev.c                       |   47 +
 lib/ethdev/rte_ethdev.h                       |  117 +-
 lib/ethdev/version.map                        |    5 +
 lib/mbuf/rte_mbuf_core.h                      |    3 +-
 lib/security/rte_security.h                   |   12 +-
 10 files changed, 2047 insertions(+), 3 deletions(-)
 create mode 100644 app/test/test_inline_ipsec.c
 create mode 100644 app/test/test_inline_ipsec_reassembly_vectors.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH 1/8] ethdev: introduce IP reassembly offload
  2022-01-03 15:08 ` [PATCH 0/8] ethdev: introduce IP " Akhil Goyal
@ 2022-01-03 15:08   ` Akhil Goyal
  2022-01-11 16:03     ` Ananyev, Konstantin
  2022-01-22  7:38     ` Andrew Rybchenko
  2022-01-03 15:08   ` [PATCH 2/8] ethdev: add dev op for IP reassembly configuration Akhil Goyal
                     ` (8 subsequent siblings)
  9 siblings, 2 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-03 15:08 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, Akhil Goyal

IP Reassembly is a costly operation if it is done in software.
The operation becomes even more costlier if IP fragmants are encrypted.
However, if it is offloaded to HW, it can considerably save application cycles.

Hence, a new offload RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY is introduced in
ethdev for devices which can attempt reassembly of packets in hardware.
rte_eth_dev_info is updated with the reassembly capabilities which a device
can support.

The resulting reassembled packet would be a typical segmented mbuf in
case of success.

And if reassembly of fragments is failed or is incomplete (if fragments do
not come before the reass_timeout), the mbuf ol_flags can be updated.
This is updated in a subsequent patch.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
---
 doc/guides/nics/features.rst | 12 ++++++++++++
 lib/ethdev/rte_ethdev.c      |  1 +
 lib/ethdev/rte_ethdev.h      | 32 +++++++++++++++++++++++++++++++-
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
index 27be2d2576..1dfdee9602 100644
--- a/doc/guides/nics/features.rst
+++ b/doc/guides/nics/features.rst
@@ -602,6 +602,18 @@ Supports inner packet L4 checksum.
   ``tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM``.
 
 
+.. _nic_features_ip_reassembly:
+
+IP reassembly
+-------------
+
+Supports IP reassembly in hardware.
+
+* **[uses]     rte_eth_rxconf,rte_eth_rxmode**: ``offloads:RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY``.
+* **[provides] mbuf**: ``mbuf.ol_flags:RTE_MBUF_F_RX_IP_REASSEMBLY_INCOMPLETE``.
+* **[provides] rte_eth_dev_info**: ``reass_capa``.
+
+
 .. _nic_features_shared_rx_queue:
 
 Shared Rx queue
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index a1d475a292..d9a03f12f9 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -126,6 +126,7 @@ static const struct {
 	RTE_RX_OFFLOAD_BIT2STR(OUTER_UDP_CKSUM),
 	RTE_RX_OFFLOAD_BIT2STR(RSS_HASH),
 	RTE_RX_OFFLOAD_BIT2STR(BUFFER_SPLIT),
+	RTE_RX_OFFLOAD_BIT2STR(IP_REASSEMBLY),
 };
 
 #undef RTE_RX_OFFLOAD_BIT2STR
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index fa299c8ad7..11427b2e4d 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1586,6 +1586,7 @@ struct rte_eth_conf {
 #define RTE_ETH_RX_OFFLOAD_RSS_HASH         RTE_BIT64(19)
 #define DEV_RX_OFFLOAD_RSS_HASH             RTE_ETH_RX_OFFLOAD_RSS_HASH
 #define RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT     RTE_BIT64(20)
+#define RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY    RTE_BIT64(21)
 
 #define RTE_ETH_RX_OFFLOAD_CHECKSUM (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \
 				 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | \
@@ -1781,6 +1782,33 @@ enum rte_eth_representor_type {
 	RTE_ETH_REPRESENTOR_PF,   /**< representor of Physical Function. */
 };
 
+/* Flag to offload IP reassembly for IPv4 packets. */
+#define RTE_ETH_DEV_REASSEMBLY_F_IPV4 (RTE_BIT32(0))
+/* Flag to offload IP reassembly for IPv6 packets. */
+#define RTE_ETH_DEV_REASSEMBLY_F_IPV6 (RTE_BIT32(1))
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice.
+ *
+ * A structure used to set IP reassembly configuration.
+ *
+ * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
+ * the PMD will attempt IP reassembly for the received packets as per
+ * properties defined in this structure:
+ *
+ */
+struct rte_eth_ip_reass_params {
+	/** Maximum time in ms which PMD can wait for other fragments. */
+	uint32_t reass_timeout;
+	/** Maximum number of fragments that can be reassembled. */
+	uint16_t max_frags;
+	/**
+	 * Flags to enable reassembly of packet types -
+	 * RTE_ETH_DEV_REASSEMBLY_F_xxx.
+	 */
+	uint16_t flags;
+};
+
 /**
  * A structure used to retrieve the contextual information of
  * an Ethernet device, such as the controlling driver of the
@@ -1841,8 +1869,10 @@ struct rte_eth_dev_info {
 	 * embedded managed interconnect/switch.
 	 */
 	struct rte_eth_switch_info switch_info;
+	/** IP reassembly offload capabilities that a device can support. */
+	struct rte_eth_ip_reass_params reass_capa;
 
-	uint64_t reserved_64s[2]; /**< Reserved for future fields */
+	uint64_t reserved_64s[1]; /**< Reserved for future fields */
 	void *reserved_ptrs[2];   /**< Reserved for future fields */
 };
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH 2/8] ethdev: add dev op for IP reassembly configuration
  2022-01-03 15:08 ` [PATCH 0/8] ethdev: introduce IP " Akhil Goyal
  2022-01-03 15:08   ` [PATCH 1/8] " Akhil Goyal
@ 2022-01-03 15:08   ` Akhil Goyal
  2022-01-11 16:09     ` Ananyev, Konstantin
  2022-01-03 15:08   ` [PATCH 3/8] ethdev: add mbuf dynfield for incomplete IP reassembly Akhil Goyal
                     ` (7 subsequent siblings)
  9 siblings, 1 reply; 53+ messages in thread
From: Akhil Goyal @ 2022-01-03 15:08 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, Akhil Goyal

A new ethernet device op is added to give application control over
the IP reassembly configuration. This operation is an optional
call from the application, default values are set by PMD and
exposed via rte_eth_dev_info.
Application should always first retreive the capabilities from
rte_eth_dev_info and then set the fields accordingly.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
---
 lib/ethdev/ethdev_driver.h | 19 +++++++++++++++++++
 lib/ethdev/rte_ethdev.c    | 30 ++++++++++++++++++++++++++++++
 lib/ethdev/rte_ethdev.h    | 28 ++++++++++++++++++++++++++++
 lib/ethdev/version.map     |  3 +++
 4 files changed, 80 insertions(+)

diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index d95605a355..0ed53c14f3 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -990,6 +990,22 @@ typedef int (*eth_representor_info_get_t)(struct rte_eth_dev *dev,
 typedef int (*eth_rx_metadata_negotiate_t)(struct rte_eth_dev *dev,
 				       uint64_t *features);
 
+/**
+ * @internal
+ * Set configuration parameters for enabling IP reassembly offload in hardware.
+ *
+ * @param dev
+ *   Port (ethdev) handle
+ *
+ * @param[in] conf
+ *   Configuration parameters for IP reassembly.
+ *
+ * @return
+ *   Negative errno value on error, zero otherwise
+ */
+typedef int (*eth_ip_reassembly_conf_set_t)(struct rte_eth_dev *dev,
+				       struct rte_eth_ip_reass_params *conf);
+
 /**
  * @internal A structure containing the functions exported by an Ethernet driver.
  */
@@ -1186,6 +1202,9 @@ struct eth_dev_ops {
 	 * kinds of metadata to the PMD
 	 */
 	eth_rx_metadata_negotiate_t rx_metadata_negotiate;
+
+	/** Set IP reassembly configuration */
+	eth_ip_reassembly_conf_set_t ip_reassembly_conf_set;
 };
 
 /**
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index d9a03f12f9..ecc6c1fe37 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -6473,6 +6473,36 @@ rte_eth_rx_metadata_negotiate(uint16_t port_id, uint64_t *features)
 		       (*dev->dev_ops->rx_metadata_negotiate)(dev, features));
 }
 
+int
+rte_eth_ip_reassembly_conf_set(uint16_t port_id,
+			       struct rte_eth_ip_reass_params *conf)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	if ((dev->data->dev_conf.rxmode.offloads &
+			RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY) == 0) {
+		RTE_ETHDEV_LOG(ERR,
+			"The port (ID=%"PRIu16") is not configured for IP reassembly\n",
+			port_id);
+		return -EINVAL;
+	}
+
+
+	if (conf == NULL) {
+		RTE_ETHDEV_LOG(ERR,
+				"Invalid IP reassembly configuration (NULL)\n");
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->ip_reassembly_conf_set,
+				-ENOTSUP);
+	return eth_err(port_id,
+		       (*dev->dev_ops->ip_reassembly_conf_set)(dev, conf));
+}
+
 RTE_LOG_REGISTER_DEFAULT(rte_eth_dev_logtype, INFO);
 
 RTE_INIT(ethdev_init_telemetry)
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 11427b2e4d..891f9a6e06 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -5218,6 +5218,34 @@ int rte_eth_representor_info_get(uint16_t port_id,
 __rte_experimental
 int rte_eth_rx_metadata_negotiate(uint16_t port_id, uint64_t *features);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Set IP reassembly configuration parameters if device rx offload
+ * flag (RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY) is enabled and the PMD
+ * supports IP reassembly offload. User should first check the
+ * reass_capa in rte_eth_dev_info before setting the configuration.
+ * The values of configuration parameters must not exceed the device
+ * capabilities. The use of this API is optional and if called, it
+ * should be called before rte_eth_dev_start().
+ *
+ * @param port_id
+ *   The port identifier of the device.
+ * @param conf
+ *   A pointer to rte_eth_ip_reass_params structure.
+ * @return
+ *   - (-ENOTSUP) if offload configuration is not supported by device.
+ *   - (-EINVAL) if offload is not enabled in rte_eth_conf.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EIO) if device is removed.
+ *   - (0) on success.
+ */
+__rte_experimental
+int rte_eth_ip_reassembly_conf_set(uint16_t port_id,
+				   struct rte_eth_ip_reass_params *conf);
+
+
 #include <rte_ethdev_core.h>
 
 /**
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index c2fb0669a4..f08fe72044 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -256,6 +256,9 @@ EXPERIMENTAL {
 	rte_flow_flex_item_create;
 	rte_flow_flex_item_release;
 	rte_flow_pick_transfer_proxy;
+
+	#added in 22.03
+	rte_eth_ip_reassembly_conf_set;
 };
 
 INTERNAL {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH 3/8] ethdev: add mbuf dynfield for incomplete IP reassembly
  2022-01-03 15:08 ` [PATCH 0/8] ethdev: introduce IP " Akhil Goyal
  2022-01-03 15:08   ` [PATCH 1/8] " Akhil Goyal
  2022-01-03 15:08   ` [PATCH 2/8] ethdev: add dev op for IP reassembly configuration Akhil Goyal
@ 2022-01-03 15:08   ` Akhil Goyal
  2022-01-11 17:04     ` Ananyev, Konstantin
  2022-01-03 15:08   ` [PATCH 4/8] security: add IPsec option for " Akhil Goyal
                     ` (6 subsequent siblings)
  9 siblings, 1 reply; 53+ messages in thread
From: Akhil Goyal @ 2022-01-03 15:08 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, Akhil Goyal

Hardware IP reassembly may be incomplete for multiple reasons like
reassembly timeout reached, duplicate fragments, etc.
To save application cycles to process these packets again, a new
mbuf ol_flag (RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE) is added to
show that the mbuf received is not reassembled properly.

Now if this flag is set, application can retreive corresponding chain of
mbufs using mbuf dynfield set by the PMD. Now, it will be upto
application to either drop those fragments or wait for more time.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
---
 lib/ethdev/ethdev_driver.h |  8 ++++++
 lib/ethdev/rte_ethdev.c    | 16 +++++++++++
 lib/ethdev/rte_ethdev.h    | 57 ++++++++++++++++++++++++++++++++++++++
 lib/ethdev/version.map     |  2 ++
 lib/mbuf/rte_mbuf_core.h   |  3 +-
 5 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index 0ed53c14f3..9a0bab9a61 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -1671,6 +1671,14 @@ int
 rte_eth_hairpin_queue_peer_unbind(uint16_t cur_port, uint16_t cur_queue,
 				  uint32_t direction);
 
+/**
+ * @internal
+ * Register mbuf dynamic field for IP reassembly incomplete case.
+ */
+__rte_internal
+int
+rte_eth_ip_reass_dynfield_register(void);
+
 
 /*
  * Legacy ethdev API used internally by drivers.
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index ecc6c1fe37..d53ce4eaca 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -6503,6 +6503,22 @@ rte_eth_ip_reassembly_conf_set(uint16_t port_id,
 		       (*dev->dev_ops->ip_reassembly_conf_set)(dev, conf));
 }
 
+#define RTE_ETH_IP_REASS_DYNFIELD_NAME "rte_eth_ip_reass_dynfield"
+int rte_eth_ip_reass_dynfield_offset = -1;
+
+int
+rte_eth_ip_reass_dynfield_register(void)
+{
+	static const struct rte_mbuf_dynfield dynfield_desc = {
+		.name = RTE_ETH_IP_REASS_DYNFIELD_NAME,
+		.size = sizeof(rte_eth_ip_reass_dynfield_t),
+		.align = __alignof__(rte_eth_ip_reass_dynfield_t),
+	};
+	rte_eth_ip_reass_dynfield_offset =
+		rte_mbuf_dynfield_register(&dynfield_desc);
+	return rte_eth_ip_reass_dynfield_offset;
+}
+
 RTE_LOG_REGISTER_DEFAULT(rte_eth_dev_logtype, INFO);
 
 RTE_INIT(ethdev_init_telemetry)
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 891f9a6e06..c4024d2265 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -5245,6 +5245,63 @@ __rte_experimental
 int rte_eth_ip_reassembly_conf_set(uint16_t port_id,
 				   struct rte_eth_ip_reass_params *conf);
 
+/**
+ * In case of IP reassembly offload failure, ol_flags in mbuf will be set
+ * with RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE and packets will be returned
+ * without alteration. The application can retrieve the attached fragments
+ * using mbuf dynamic field.
+ */
+typedef struct {
+	/**
+	 * Next fragment packet. Application should fetch dynamic field of
+	 * each fragment until a NULL is received and nb_frags is 0.
+	 */
+	struct rte_mbuf *next_frag;
+	/** Time spent(in ms) by HW in waiting for further fragments. */
+	uint16_t time_spent;
+	/** Number of more fragments attached in mbuf dynamic fields. */
+	uint16_t nb_frags;
+} rte_eth_ip_reass_dynfield_t;
+
+extern int rte_eth_ip_reass_dynfield_offset;
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Get pointer to mbuf dynamic field for getting incomplete
+ * reassembled fragments.
+ *
+ * For performance reason, no check is done,
+ * the dynamic field may not be registered.
+ * @see rte_eth_ip_reass_dynfield_is_registered
+ *
+ * @param	mbuf	packet to access
+ * @return pointer to mbuf dynamic field
+ */
+__rte_experimental
+static inline rte_eth_ip_reass_dynfield_t *
+rte_eth_ip_reass_dynfield(struct rte_mbuf *mbuf)
+{
+	return RTE_MBUF_DYNFIELD(mbuf,
+		rte_eth_ip_reass_dynfield_offset,
+		rte_eth_ip_reass_dynfield_t *);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Check whether the dynamic field is registered.
+ *
+ * @return true if rte_eth_ip_reass_dynfield_register() has been called.
+ */
+__rte_experimental
+static inline bool rte_eth_ip_reass_dynfield_is_registered(void)
+{
+	return rte_eth_ip_reass_dynfield_offset >= 0;
+}
+
 
 #include <rte_ethdev_core.h>
 
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index f08fe72044..e824b776b1 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -259,6 +259,7 @@ EXPERIMENTAL {
 
 	#added in 22.03
 	rte_eth_ip_reassembly_conf_set;
+	rte_eth_ip_reass_dynfield_offset;
 };
 
 INTERNAL {
@@ -282,6 +283,7 @@ INTERNAL {
 	rte_eth_hairpin_queue_peer_bind;
 	rte_eth_hairpin_queue_peer_unbind;
 	rte_eth_hairpin_queue_peer_update;
+	rte_eth_ip_reass_dynfield_register;
 	rte_eth_representor_id_get;
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index 321a419c71..2cd1f95ae4 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -233,10 +233,11 @@ extern "C" {
 #define PKT_RX_OUTER_L4_CKSUM_INVALID \
 	RTE_DEPRECATED(PKT_RX_OUTER_L4_CKSUM_INVALID) \
 	RTE_MBUF_F_RX_OUTER_L4_CKSUM_INVALID
+#define RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE   (1ULL << 23)
 
 /* add new RX flags here, don't forget to update RTE_MBUF_F_FIRST_FREE */
 
-#define RTE_MBUF_F_FIRST_FREE (1ULL << 23)
+#define RTE_MBUF_F_FIRST_FREE (1ULL << 24)
 #define PKT_FIRST_FREE RTE_DEPRECATED(PKT_FIRST_FREE) RTE_MBUF_F_FIRST_FREE
 #define RTE_MBUF_F_LAST_FREE (1ULL << 40)
 #define PKT_LAST_FREE RTE_DEPRECATED(PKT_LAST_FREE) RTE_MBUF_F_LAST_FREE
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH 4/8] security: add IPsec option for IP reassembly
  2022-01-03 15:08 ` [PATCH 0/8] ethdev: introduce IP " Akhil Goyal
                     ` (2 preceding siblings ...)
  2022-01-03 15:08   ` [PATCH 3/8] ethdev: add mbuf dynfield for incomplete IP reassembly Akhil Goyal
@ 2022-01-03 15:08   ` Akhil Goyal
  2022-01-03 15:08   ` [PATCH 5/8] app/test: add unit cases for inline IPsec offload Akhil Goyal
                     ` (5 subsequent siblings)
  9 siblings, 0 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-03 15:08 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, Akhil Goyal

A new option is added in IPsec to enable and attempt reassembly
of inbound packets.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
---
 lib/security/rte_security.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
index 1228b6c8b1..168b837a82 100644
--- a/lib/security/rte_security.h
+++ b/lib/security/rte_security.h
@@ -264,6 +264,16 @@ struct rte_security_ipsec_sa_options {
 	 */
 	uint32_t l4_csum_enable : 1;
 
+	/** Enable reassembly on incoming packets.
+	 *
+	 * * 1: Enable driver to try reassembly of encrypted IP packets for
+	 *      this SA, if supported by the driver. This feature will work
+	 *      only if rx_offload RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY is set in
+	 *      inline Ethernet device.
+	 * * 0: Disable reassembly of packets (default).
+	 */
+	uint32_t reass_en : 1;
+
 	/** Reserved bit fields for future extension
 	 *
 	 * User should ensure reserved_opts is cleared as it may change in
@@ -271,7 +281,7 @@ struct rte_security_ipsec_sa_options {
 	 *
 	 * Note: Reduce number of bits in reserved_opts for every new option.
 	 */
-	uint32_t reserved_opts : 18;
+	uint32_t reserved_opts : 17;
 };
 
 /** IPSec security association direction */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH 5/8] app/test: add unit cases for inline IPsec offload
  2022-01-03 15:08 ` [PATCH 0/8] ethdev: introduce IP " Akhil Goyal
                     ` (3 preceding siblings ...)
  2022-01-03 15:08   ` [PATCH 4/8] security: add IPsec option for " Akhil Goyal
@ 2022-01-03 15:08   ` Akhil Goyal
  2022-01-20 16:48     ` [PATCH v2 0/4] app/test: add inline IPsec and reassembly cases Akhil Goyal
  2022-01-03 15:08   ` [PATCH 6/8] app/test: add IP reassembly case with no frags Akhil Goyal
                     ` (4 subsequent siblings)
  9 siblings, 1 reply; 53+ messages in thread
From: Akhil Goyal @ 2022-01-03 15:08 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, Akhil Goyal

A new test suite is added in test app to test inline IPsec protocol
offload. In this patch, a couple of predefined plain and cipher test
vectors are used to verify the IPsec functionality without the need of
external traffic generators. The sent packet is loopbacked onto the same
interface which is received and matched with the expected output.
The test suite can be updated further with other functional test cases.
The testsuite can be run using:
RTE> inline_ipsec_autotest

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
---
 app/test/meson.build                          |   1 +
 app/test/test_inline_ipsec.c                  | 728 ++++++++++++++++++
 .../test_inline_ipsec_reassembly_vectors.h    | 198 +++++
 3 files changed, 927 insertions(+)
 create mode 100644 app/test/test_inline_ipsec.c
 create mode 100644 app/test/test_inline_ipsec_reassembly_vectors.h

diff --git a/app/test/meson.build b/app/test/meson.build
index 2b480adfba..9c88240e3f 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -74,6 +74,7 @@ test_sources = files(
         'test_hash_readwrite.c',
         'test_hash_perf.c',
         'test_hash_readwrite_lf_perf.c',
+	'test_inline_ipsec.c',
         'test_interrupts.c',
         'test_ipfrag.c',
         'test_ipsec.c',
diff --git a/app/test/test_inline_ipsec.c b/app/test/test_inline_ipsec.c
new file mode 100644
index 0000000000..54b56ba9e8
--- /dev/null
+++ b/app/test/test_inline_ipsec.c
@@ -0,0 +1,728 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <unistd.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_security.h>
+#include <rte_ipsec.h>
+#include <rte_byteorder.h>
+#include <rte_atomic.h>
+#include <rte_malloc.h>
+#include "test_inline_ipsec_reassembly_vectors.h"
+#include "test.h"
+
+#define NB_ETHPORTS_USED                (1)
+#define NB_SOCKETS                      (2)
+#define MEMPOOL_CACHE_SIZE 32
+#define MAX_PKT_BURST                   (32)
+#define RTE_TEST_RX_DESC_DEFAULT        (1024)
+#define RTE_TEST_TX_DESC_DEFAULT        (1024)
+#define RTE_PORT_ALL            (~(uint16_t)0x0)
+
+/*
+ * RX and TX Prefetch, Host, and Write-back threshold values should be
+ * carefully set for optimal performance. Consult the network
+ * controller's datasheet and supporting DPDK documentation for guidance
+ * on how these parameters should be set.
+ */
+#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */
+#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */
+#define RX_WTHRESH 0 /**< Default values of RX write-back threshold reg. */
+
+#define TX_PTHRESH 32 /**< Default values of TX prefetch threshold reg. */
+#define TX_HTHRESH 0  /**< Default values of TX host threshold reg. */
+#define TX_WTHRESH 0  /**< Default values of TX write-back threshold reg. */
+
+#define MAX_TRAFFIC_BURST              2048
+
+#define NB_MBUF 1024
+
+#define APP_REASS_TIMEOUT		20
+
+static struct rte_mempool *mbufpool[NB_SOCKETS];
+static struct rte_mempool *sess_pool[NB_SOCKETS];
+static struct rte_mempool *sess_priv_pool[NB_SOCKETS];
+/* ethernet addresses of ports */
+static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+
+static struct rte_eth_conf port_conf = {
+	.rxmode = {
+		.mq_mode = RTE_ETH_MQ_RX_NONE,
+		.split_hdr_size = 0,
+		.offloads = RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY |
+			    RTE_ETH_RX_OFFLOAD_CHECKSUM |
+			    RTE_ETH_RX_OFFLOAD_SECURITY,
+	},
+	.txmode = {
+		.mq_mode = RTE_ETH_MQ_TX_NONE,
+		.offloads = RTE_ETH_TX_OFFLOAD_SECURITY |
+			    RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE,
+	},
+	.lpbk_mode = 1,  /* enable loopback */
+};
+
+static struct rte_eth_rxconf rx_conf = {
+	.rx_thresh = {
+		.pthresh = RX_PTHRESH,
+		.hthresh = RX_HTHRESH,
+		.wthresh = RX_WTHRESH,
+	},
+	.rx_free_thresh = 32,
+};
+
+static struct rte_eth_txconf tx_conf = {
+	.tx_thresh = {
+		.pthresh = TX_PTHRESH,
+		.hthresh = TX_HTHRESH,
+		.wthresh = TX_WTHRESH,
+	},
+	.tx_free_thresh = 32, /* Use PMD default values */
+	.tx_rs_thresh = 32, /* Use PMD default values */
+};
+
+enum {
+	LCORE_INVALID = 0,
+	LCORE_AVAIL,
+	LCORE_USED,
+};
+
+struct lcore_cfg {
+	uint8_t status;
+	uint8_t socketid;
+	uint16_t nb_ports;
+	uint16_t port;
+} __rte_cache_aligned;
+
+struct lcore_cfg lcore_cfg;
+
+static uint64_t link_mbps;
+
+/* Create Inline IPsec session */
+static int
+create_inline_ipsec_session(struct ipsec_session_data *sa,
+		uint16_t portid, struct rte_ipsec_session *ips,
+		enum rte_security_ipsec_sa_direction dir,
+		enum rte_security_ipsec_tunnel_type tun_type)
+{
+	int32_t ret = 0;
+	struct rte_security_ctx *sec_ctx;
+	uint32_t src_v4 = rte_cpu_to_be_32(RTE_IPV4(192, 168, 1, 0));
+	uint32_t dst_v4 = rte_cpu_to_be_32(RTE_IPV4(192, 168, 1, 1));
+	uint16_t src_v6[8] = {0x2607, 0xf8b0, 0x400c, 0x0c03, 0x0000, 0x0000,
+				0x0000, 0x001a};
+	uint16_t dst_v6[8] = {0x2001, 0x0470, 0xe5bf, 0xdead, 0x4957, 0x2174,
+				0xe82c, 0x4887};
+	struct rte_security_session_conf sess_conf = {
+		.action_type = RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL,
+		.protocol = RTE_SECURITY_PROTOCOL_IPSEC,
+		.ipsec = sa->ipsec_xform,
+		.crypto_xform = &sa->xform.aead,
+		.userdata = NULL,
+	};
+	sess_conf.ipsec.direction = dir;
+
+	const struct rte_security_capability *sec_cap;
+
+	sec_ctx = (struct rte_security_ctx *)
+			rte_eth_dev_get_sec_ctx(portid);
+
+	if (sec_ctx == NULL) {
+		printf("Ethernet device doesn't support security features.\n");
+		return TEST_SKIPPED;
+	}
+
+	sess_conf.crypto_xform->aead.key.data = sa->key.data;
+
+	/* Save SA as userdata for the security session. When
+	 * the packet is received, this userdata will be
+	 * retrieved using the metadata from the packet.
+	 *
+	 * The PMD is expected to set similar metadata for other
+	 * operations, like rte_eth_event, which are tied to
+	 * security session. In such cases, the userdata could
+	 * be obtained to uniquely identify the security
+	 * parameters denoted.
+	 */
+
+	sess_conf.userdata = (void *) sa;
+	sess_conf.ipsec.tunnel.type = tun_type;
+	if (tun_type == RTE_SECURITY_IPSEC_TUNNEL_IPV4) {
+		memcpy(&sess_conf.ipsec.tunnel.ipv4.src_ip, &src_v4,
+				sizeof(src_v4));
+		memcpy(&sess_conf.ipsec.tunnel.ipv4.dst_ip, &dst_v4,
+				sizeof(dst_v4));
+	} else {
+		memcpy(&sess_conf.ipsec.tunnel.ipv6.src_addr, &src_v6,
+				sizeof(src_v6));
+		memcpy(&sess_conf.ipsec.tunnel.ipv6.dst_addr, &dst_v6,
+				sizeof(dst_v6));
+	}
+	ips->security.ses = rte_security_session_create(sec_ctx,
+				&sess_conf, sess_pool[lcore_cfg.socketid],
+				sess_priv_pool[lcore_cfg.socketid]);
+	if (ips->security.ses == NULL) {
+		printf("SEC Session init failed: err: %d\n", ret);
+		return TEST_FAILED;
+	}
+
+	sec_cap = rte_security_capabilities_get(sec_ctx);
+	if (sec_cap == NULL) {
+		printf("No capabilities registered\n");
+		return TEST_SKIPPED;
+	}
+
+	/* iterate until ESP tunnel*/
+	while (sec_cap->action !=
+			RTE_SECURITY_ACTION_TYPE_NONE) {
+		if (sec_cap->action == sess_conf.action_type &&
+		    sec_cap->protocol ==
+			RTE_SECURITY_PROTOCOL_IPSEC &&
+		    sec_cap->ipsec.mode ==
+			sess_conf.ipsec.mode &&
+		    sec_cap->ipsec.direction == dir)
+			break;
+		sec_cap++;
+	}
+
+	if (sec_cap->action == RTE_SECURITY_ACTION_TYPE_NONE) {
+		printf("No suitable security capability found\n");
+		return TEST_SKIPPED;
+	}
+
+	ips->security.ol_flags = sec_cap->ol_flags;
+	ips->security.ctx = sec_ctx;
+
+	return 0;
+}
+
+/* Check the link status of all ports in up to 3s, and print them finally */
+static void
+check_all_ports_link_status(uint16_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 30 /* 3s (30 * 100ms) in total */
+	uint16_t portid;
+	uint8_t count, all_ports_up, print_flag = 0;
+	struct rte_eth_link link;
+	int ret;
+	char link_status[RTE_ETH_LINK_MAX_STR_LEN];
+
+	printf("Checking link statuses...\n");
+	fflush(stdout);
+	for (count = 0; count <= MAX_CHECK_TIME; count++) {
+		all_ports_up = 1;
+		for (portid = 0; portid < port_num; portid++) {
+			if ((port_mask & (1 << portid)) == 0)
+				continue;
+			memset(&link, 0, sizeof(link));
+			ret = rte_eth_link_get_nowait(portid, &link);
+			if (ret < 0) {
+				all_ports_up = 0;
+				if (print_flag == 1)
+					printf("Port %u link get failed: %s\n",
+						portid, rte_strerror(-ret));
+				continue;
+			}
+
+			/* print link status if flag set */
+			if (print_flag == 1) {
+				if (link.link_status && link_mbps == 0)
+					link_mbps = link.link_speed;
+
+				rte_eth_link_to_str(link_status,
+					sizeof(link_status), &link);
+				printf("Port %d %s\n", portid, link_status);
+				continue;
+			}
+			/* clear all_ports_up flag if any link down */
+			if (link.link_status == RTE_ETH_LINK_DOWN) {
+				all_ports_up = 0;
+				break;
+			}
+		}
+		/* after finally printing all link status, get out */
+		if (print_flag == 1)
+			break;
+
+		if (all_ports_up == 0) {
+			fflush(stdout);
+			rte_delay_ms(CHECK_INTERVAL);
+		}
+
+		/* set the print_flag if all ports up or timeout */
+		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1))
+			print_flag = 1;
+	}
+}
+
+static void
+print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr)
+{
+	char buf[RTE_ETHER_ADDR_FMT_SIZE];
+	rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr);
+	printf("%s%s", name, buf);
+}
+
+static void
+copy_buf_to_pkt_segs(void *buf, unsigned len, struct rte_mbuf *pkt,
+		unsigned offset)
+{
+	struct rte_mbuf *seg;
+	void *seg_buf;
+	unsigned copy_len;
+
+	seg = pkt;
+	while (offset >= seg->data_len) {
+		offset -= seg->data_len;
+		seg = seg->next;
+	}
+	copy_len = seg->data_len - offset;
+	seg_buf = rte_pktmbuf_mtod_offset(seg, char *, offset);
+	while (len > copy_len) {
+		rte_memcpy(seg_buf, buf, (size_t) copy_len);
+		len -= copy_len;
+		buf = ((char *) buf + copy_len);
+		seg = seg->next;
+		seg_buf = rte_pktmbuf_mtod(seg, void *);
+	}
+	rte_memcpy(seg_buf, buf, (size_t) len);
+}
+
+static inline void
+copy_buf_to_pkt(void *buf, unsigned len, struct rte_mbuf *pkt, unsigned offset)
+{
+	if (offset + len <= pkt->data_len) {
+		rte_memcpy(rte_pktmbuf_mtod_offset(pkt, char *, offset), buf,
+			   (size_t) len);
+		return;
+	}
+	copy_buf_to_pkt_segs(buf, len, pkt, offset);
+}
+
+static inline int
+init_traffic(struct rte_mempool *mp,
+	     struct rte_mbuf **pkts_burst,
+	     struct ipsec_test_packet *vectors[],
+	     uint32_t nb_pkts)
+{
+	struct rte_mbuf *pkt;
+	uint32_t i;
+
+	for (i = 0; i < nb_pkts; i++) {
+		pkt = rte_pktmbuf_alloc(mp);
+		if (pkt == NULL) {
+			return TEST_FAILED;
+		}
+		pkt->data_len = vectors[i]->len;
+		pkt->pkt_len = vectors[i]->len;
+		copy_buf_to_pkt(vectors[i]->data, vectors[i]->len,
+				pkt, vectors[i]->l2_offset);
+
+		pkts_burst[i] = pkt;
+	}
+	return i;
+}
+
+static int
+init_lcore(void)
+{
+	unsigned lcore_id;
+
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		lcore_cfg.socketid =
+			rte_lcore_to_socket_id(lcore_id);
+		if (rte_lcore_is_enabled(lcore_id) == 0) {
+			lcore_cfg.status = LCORE_INVALID;
+			continue;
+		} else {
+			lcore_cfg.status = LCORE_AVAIL;
+			break;
+		}
+	}
+	return 0;
+}
+
+static int
+init_mempools(unsigned nb_mbuf)
+{
+	struct rte_security_ctx *sec_ctx;
+	int socketid;
+	unsigned lcore_id;
+	uint16_t nb_sess = 64;
+	uint32_t sess_sz;
+	char s[64];
+
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (rte_lcore_is_enabled(lcore_id) == 0)
+			continue;
+
+		socketid = rte_lcore_to_socket_id(lcore_id);
+		if (socketid >= NB_SOCKETS) {
+			rte_exit(EXIT_FAILURE,
+				"Socket %d of lcore %u is out of range %d\n",
+				socketid, lcore_id, NB_SOCKETS);
+		}
+		if (mbufpool[socketid] == NULL) {
+			snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
+			mbufpool[socketid] =
+				rte_pktmbuf_pool_create(s, nb_mbuf,
+					MEMPOOL_CACHE_SIZE, 0,
+					RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
+			if (mbufpool[socketid] == NULL)
+				rte_exit(EXIT_FAILURE,
+					"Cannot init mbuf pool on socket %d\n",
+					socketid);
+			else
+				printf("Allocated mbuf pool on socket %d\n",
+					socketid);
+		}
+
+		sec_ctx = rte_eth_dev_get_sec_ctx(lcore_cfg.port);
+		if (sec_ctx == NULL)
+			continue;
+
+		sess_sz = rte_security_session_get_size(sec_ctx);
+		if (sess_pool[socketid] == NULL) {
+			snprintf(s, sizeof(s), "sess_pool_%d", socketid);
+			sess_pool[socketid] =
+				rte_mempool_create(s, nb_sess,
+					sess_sz,
+					MEMPOOL_CACHE_SIZE, 0,
+					NULL, NULL, NULL, NULL,
+					socketid, 0);
+			if (sess_pool[socketid] == NULL) {
+				printf("Cannot init sess pool on socket %d\n",
+					socketid);
+				return TEST_FAILED;
+			} else
+				printf("Allocated sess pool on socket %d\n",
+					socketid);
+		}
+		if (sess_priv_pool[socketid] == NULL) {
+			snprintf(s, sizeof(s), "sess_priv_pool_%d", socketid);
+			sess_priv_pool[socketid] =
+				rte_mempool_create(s, nb_sess,
+					sess_sz,
+					MEMPOOL_CACHE_SIZE, 0,
+					NULL, NULL, NULL, NULL,
+					socketid, 0);
+			if (sess_priv_pool[socketid] == NULL) {
+				printf("Cannot init sess_priv pool on socket %d\n",
+					socketid);
+				return TEST_FAILED;
+			} else
+				printf("Allocated sess_priv pool on socket %d\n",
+					socketid);
+		}
+	}
+	return 0;
+}
+
+static void
+create_default_flow(uint16_t port_id)
+{
+	struct rte_flow_action action[2];
+	struct rte_flow_item pattern[2];
+	struct rte_flow_attr attr = {0};
+	struct rte_flow_error err;
+	struct rte_flow *flow;
+	int ret;
+
+	/* Add the default rte_flow to enable SECURITY for all ESP packets */
+
+	pattern[0].type = RTE_FLOW_ITEM_TYPE_ESP;
+	pattern[0].spec = NULL;
+	pattern[0].mask = NULL;
+	pattern[0].last = NULL;
+	pattern[1].type = RTE_FLOW_ITEM_TYPE_END;
+
+	action[0].type = RTE_FLOW_ACTION_TYPE_SECURITY;
+	action[0].conf = NULL;
+	action[1].type = RTE_FLOW_ACTION_TYPE_END;
+	action[1].conf = NULL;
+
+	attr.ingress = 1;
+
+	ret = rte_flow_validate(port_id, &attr, pattern, action, &err);
+	if (ret)
+		return;
+
+	flow = rte_flow_create(port_id, &attr, pattern, action, &err);
+	if (flow == NULL)
+		return;
+}
+
+struct rte_mbuf **tx_pkts_burst;
+
+static int
+test_ipsec(struct reassembly_vector *vector,
+	   enum rte_security_ipsec_sa_direction dir,
+	   enum rte_security_ipsec_tunnel_type tun_type)
+{
+	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+	unsigned i, portid, nb_rx = 0, nb_tx = 1;
+	struct rte_ipsec_session ips = {0};
+	struct rte_eth_dev_info dev_info = {0};
+
+	portid = lcore_cfg.port;
+	rte_eth_dev_info_get(portid, &dev_info);
+	if (dev_info.reass_capa.max_frags < nb_tx)
+		return TEST_SKIPPED;
+
+	init_traffic(mbufpool[lcore_cfg.socketid],
+			tx_pkts_burst, vector->frags, nb_tx);
+
+	/* Create Inline IPsec session. */
+	if (create_inline_ipsec_session(vector->sa_data, portid, &ips, dir,
+					tun_type))
+		return TEST_FAILED;
+	if (dir == RTE_SECURITY_IPSEC_SA_DIR_INGRESS)
+		create_default_flow(portid);
+	else {
+		for (i = 0; i < nb_tx; i++) {
+			if (ips.security.ol_flags &
+					RTE_SECURITY_TX_OLOAD_NEED_MDATA)
+				rte_security_set_pkt_metadata(ips.security.ctx,
+				ips.security.ses, tx_pkts_burst[i], NULL);
+			tx_pkts_burst[i]->ol_flags |= RTE_MBUF_F_TX_SEC_OFFLOAD;
+			tx_pkts_burst[i]->l2_len = 14;
+		}
+	}
+
+	nb_tx = rte_eth_tx_burst(portid, 0, tx_pkts_burst, nb_tx);
+
+	rte_pause();
+
+	do {
+		nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst, MAX_PKT_BURST);
+	} while (nb_rx == 0);
+
+	/* Destroy session so that other cases can create the session again */
+	rte_security_session_destroy(ips.security.ctx, ips.security.ses);
+
+	/* Compare results with known vectors. */
+	if (nb_rx == 1) {
+		if (memcmp(rte_pktmbuf_mtod(pkts_burst[0], char *),
+					vector->full_pkt->data,
+					(size_t) vector->full_pkt->len)) {
+			printf("\n====Inline IPsec case failed: Data Mismatch");
+			rte_hexdump(stdout, "received",
+				rte_pktmbuf_mtod(pkts_burst[0], char *),
+				vector->full_pkt->len);
+			rte_hexdump(stdout, "reference",
+				vector->full_pkt->data,
+				vector->full_pkt->len);
+			return TEST_FAILED;
+		}
+		return TEST_SUCCESS;
+	} else
+		return TEST_FAILED;
+}
+
+static int
+ut_setup_inline_ipsec(void)
+{
+	uint16_t portid = lcore_cfg.port;
+	int ret;
+
+	/* Set IP reassembly configuration. */
+	struct rte_eth_dev_info dev_info = {0};
+	rte_eth_dev_info_get(portid, &dev_info);
+
+	ret = rte_eth_ip_reassembly_conf_set(portid, &dev_info.reass_capa);
+	if (ret < 0) {
+		printf("IP reassembly configuration err=%d, port=%d\n",
+			ret, portid);
+		return ret;
+	}
+
+	/* Start device */
+	ret = rte_eth_dev_start(portid);
+	if (ret < 0) {
+		printf("rte_eth_dev_start: err=%d, port=%d\n",
+			ret, portid);
+		return ret;
+	}
+	/* always eanble promiscuous */
+	ret = rte_eth_promiscuous_enable(portid);
+	if (ret != 0) {
+		printf("rte_eth_promiscuous_enable: err=%s, port=%d\n",
+			rte_strerror(-ret), portid);
+		return ret;
+	}
+	lcore_cfg.port = portid;
+	check_all_ports_link_status(1, RTE_PORT_ALL);
+
+	return 0;
+}
+
+static void
+ut_teardown_inline_ipsec(void)
+{
+	uint16_t portid = lcore_cfg.port;
+	int socketid = lcore_cfg.socketid;
+	int ret;
+
+	/* port tear down */
+	RTE_ETH_FOREACH_DEV(portid) {
+		if (socketid != rte_eth_dev_socket_id(portid))
+			continue;
+
+		ret = rte_eth_dev_stop(portid);
+		if (ret != 0)
+			printf("rte_eth_dev_stop: err=%s, port=%u\n",
+			       rte_strerror(-ret), portid);
+	}
+}
+
+static int
+testsuite_setup(void)
+{
+	uint16_t nb_rxd;
+	uint16_t nb_txd;
+	uint16_t nb_ports;
+	int socketid, ret;
+	uint16_t nb_rx_queue = 1, nb_tx_queue = 1;
+	uint16_t portid = lcore_cfg.port;
+
+	printf("Start inline IPsec test.\n");
+
+	nb_ports = rte_eth_dev_count_avail();
+	if (nb_ports < NB_ETHPORTS_USED) {
+		printf("At least %u port(s) used for test\n",
+		       NB_ETHPORTS_USED);
+		return -1;
+	}
+
+	init_lcore();
+
+	init_mempools(NB_MBUF);
+
+	socketid = lcore_cfg.socketid;
+	if (tx_pkts_burst == NULL) {
+		tx_pkts_burst = (struct rte_mbuf **)
+			rte_calloc_socket("tx_buff",
+					  MAX_TRAFFIC_BURST * nb_ports,
+					  sizeof(void *),
+					  RTE_CACHE_LINE_SIZE, socketid);
+		if (!tx_pkts_burst)
+			return -1;
+	}
+
+	printf("Generate %d packets @socket %d\n",
+	       MAX_TRAFFIC_BURST * nb_ports, socketid);
+
+	nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+	nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+	/* port configure */
+	ret = rte_eth_dev_configure(portid, nb_rx_queue,
+				    nb_tx_queue, &port_conf);
+	if (ret < 0) {
+		printf("Cannot configure device: err=%d, port=%d\n",
+			 ret, portid);
+		return ret;
+	}
+	ret = rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
+	if (ret < 0) {
+		printf("Cannot get mac address: err=%d, port=%d\n",
+			 ret, portid);
+		return ret;
+	}
+	printf("Port %u ", portid);
+	print_ethaddr("Address:", &ports_eth_addr[portid]);
+	printf("\n");
+
+	/* tx queue setup */
+	ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
+				     socketid, &tx_conf);
+	if (ret < 0) {
+		printf("rte_eth_tx_queue_setup: err=%d, port=%d\n",
+				ret, portid);
+		return ret;
+	}
+	/* rx queue steup */
+	ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
+					socketid, &rx_conf,
+					mbufpool[socketid]);
+	if (ret < 0) {
+		printf("rte_eth_rx_queue_setup: err=%d, port=%d\n",
+				ret, portid);
+		return ret;
+	}
+
+
+	return 0;
+}
+
+static void
+testsuite_teardown(void)
+{
+	int ret;
+	uint16_t portid = lcore_cfg.port;
+	uint16_t socketid = lcore_cfg.socketid;
+
+	/* port tear down */
+	RTE_ETH_FOREACH_DEV(portid) {
+		if (socketid != rte_eth_dev_socket_id(portid))
+			continue;
+
+		ret = rte_eth_dev_stop(portid);
+		if (ret != 0)
+			printf("rte_eth_dev_stop: err=%s, port=%u\n",
+			       rte_strerror(-ret), portid);
+	}
+}
+static int
+test_ipsec_ipv4_encap_nofrag(void) {
+	struct reassembly_vector ipv4_nofrag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_gcm128_cipher,
+				.frags[0] = &pkt_ipv4_plain,
+	};
+	return test_ipsec(&ipv4_nofrag_case,
+			RTE_SECURITY_IPSEC_SA_DIR_EGRESS,
+			RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+static int
+test_ipsec_ipv4_decap_nofrag(void) {
+	struct reassembly_vector ipv4_nofrag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_plain,
+				.frags[0] = &pkt_ipv4_gcm128_cipher,
+	};
+	return test_ipsec(&ipv4_nofrag_case,
+			RTE_SECURITY_IPSEC_SA_DIR_INGRESS,
+			RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+static struct unit_test_suite inline_ipsec_testsuite  = {
+	.suite_name = "Inline IPsec Ethernet Device Unit Test Suite",
+	.setup = testsuite_setup,
+	.teardown = testsuite_teardown,
+	.unit_test_cases = {
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_ipsec_ipv4_encap_nofrag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_ipsec_ipv4_decap_nofrag),
+
+		TEST_CASES_END() /**< NULL terminate unit test array */
+	}
+};
+
+static int
+test_inline_ipsec(void)
+{
+	return unit_test_suite_runner(&inline_ipsec_testsuite);
+}
+
+REGISTER_TEST_COMMAND(inline_ipsec_autotest, test_inline_ipsec);
diff --git a/app/test/test_inline_ipsec_reassembly_vectors.h b/app/test/test_inline_ipsec_reassembly_vectors.h
new file mode 100644
index 0000000000..68066a0957
--- /dev/null
+++ b/app/test/test_inline_ipsec_reassembly_vectors.h
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+#ifndef _TEST_INLINE_IPSEC_REASSEMBLY_VECTORS_H_
+#define _TEST_INLINE_IPSEC_REASSEMBLY_VECTORS_H_
+
+#define MAX_FRAG_LEN		 1500
+#define MAX_FRAGS		 6
+#define MAX_PKT_LEN		 (MAX_FRAG_LEN * MAX_FRAGS)
+struct ipsec_session_data {
+	struct {
+		uint8_t data[32];
+	} key;
+	struct {
+		uint8_t data[4];
+		unsigned int len;
+	} salt;
+	struct {
+		uint8_t data[16];
+	} iv;
+	struct rte_security_ipsec_xform ipsec_xform;
+	bool aead;
+	union {
+		struct {
+			struct rte_crypto_sym_xform cipher;
+			struct rte_crypto_sym_xform auth;
+		} chain;
+		struct rte_crypto_sym_xform aead;
+	} xform;
+};
+
+struct ipsec_test_packet {
+	uint32_t len;
+	uint32_t l2_offset;
+	uint32_t l3_offset;
+	uint32_t l4_offset;
+	uint8_t data[MAX_PKT_LEN];
+};
+
+struct reassembly_vector {
+	struct ipsec_session_data *sa_data;
+	struct ipsec_test_packet *full_pkt;
+	struct ipsec_test_packet *frags[MAX_FRAGS];
+};
+
+struct ipsec_test_packet pkt_ipv4_plain = {
+	.len = 76,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x00, 0x3e, 0x69, 0x8f, 0x00, 0x00,
+		0x80, 0x11, 0x4d, 0xcc, 0xc0, 0xa8, 0x01, 0x02,
+		0xc0, 0xa8, 0x01, 0x01,
+
+		/* UDP */
+		0x0a, 0x98, 0x00, 0x35, 0x00, 0x2a, 0x23, 0x43,
+		0xb2, 0xd0, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x03, 0x73, 0x69, 0x70,
+		0x09, 0x63, 0x79, 0x62, 0x65, 0x72, 0x63, 0x69,
+		0x74, 0x79, 0x02, 0x64, 0x6b, 0x00, 0x00, 0x01,
+		0x00, 0x01,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_gcm128_cipher = {
+	.len = 130,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP - outer header */
+		0x45, 0x00, 0x00, 0x74, 0x69, 0x8f, 0x00, 0x00,
+		0x80, 0x32, 0x4d, 0x75, 0xc0, 0xa8, 0x01, 0x02,
+		0xc0, 0xa8, 0x01, 0x01,
+
+		/* ESP */
+		0x00, 0x00, 0xa5, 0xf8, 0x00, 0x00, 0x00, 0x01,
+
+		/* IV */
+		0xfa, 0xce, 0xdb, 0xad, 0xde, 0xca, 0xf8, 0x88,
+
+		/* Data */
+		0xde, 0xb2, 0x2c, 0xd9, 0xb0, 0x7c, 0x72, 0xc1,
+		0x6e, 0x3a, 0x65, 0xbe, 0xeb, 0x8d, 0xf3, 0x04,
+		0xa5, 0xa5, 0x89, 0x7d, 0x33, 0xae, 0x53, 0x0f,
+		0x1b, 0xa7, 0x6d, 0x5d, 0x11, 0x4d, 0x2a, 0x5c,
+		0x3d, 0xe8, 0x18, 0x27, 0xc1, 0x0e, 0x9a, 0x4f,
+		0x51, 0x33, 0x0d, 0x0e, 0xec, 0x41, 0x66, 0x42,
+		0xcf, 0xbb, 0x85, 0xa5, 0xb4, 0x7e, 0x48, 0xa4,
+		0xec, 0x3b, 0x9b, 0xa9, 0x5d, 0x91, 0x8b, 0xd4,
+		0x29, 0xc7, 0x37, 0x57, 0x9f, 0xf1, 0x9e, 0x58,
+		0xcf, 0xfc, 0x60, 0x7a, 0x3b, 0xce, 0x89, 0x94,
+	},
+};
+
+static inline void
+test_vector_payload_populate(struct ipsec_test_packet *pkt,
+		bool first_frag)
+{
+	uint32_t i = pkt->l4_offset;
+
+	/* For non-fragmented packets and first frag, skip 8 bytes from
+	 * l4_offset for UDP header */
+
+	if (first_frag)
+		i += 8;
+
+	for (; i < pkt->len; i++)
+		pkt->data[i] = 0x58;
+}
+
+static inline unsigned int
+reass_test_vectors_init(struct reassembly_vector *vector)
+{
+	unsigned int i = 0;
+
+	if (vector->frags[0] != NULL && vector->frags[1] == NULL)
+		return 1;
+
+	test_vector_payload_populate(vector->full_pkt, true);
+	for (;vector->frags[i] != NULL && i < MAX_FRAGS; i++)
+		test_vector_payload_populate(vector->frags[i],
+				(i == 0) ? true : false);
+	return i;
+}
+
+struct ipsec_session_data conf_aes_128_gcm = {
+	.key = {
+		.data = {
+			0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
+			0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
+		},
+	},
+
+	.salt = {
+		.data = {
+			0xca, 0xfe, 0xba, 0xbe
+		},
+		.len = 4,
+	},
+
+	.iv = {
+		.data = {
+			0xfa, 0xce, 0xdb, 0xad, 0xde, 0xca, 0xf8, 0x88
+		},
+	},
+
+	.ipsec_xform = {
+		.spi = 0xa5f8,
+		.salt = 0xbebafeca,
+		.options.esn = 0,
+		.options.udp_encap = 0,
+		.options.copy_dscp = 0,
+		.options.copy_flabel = 0,
+		.options.copy_df = 0,
+		.options.dec_ttl = 0,
+		.options.ecn = 0,
+		.options.stats = 0,
+		.options.tunnel_hdr_verify = 0,
+		.options.ip_csum_enable = 0,
+		.options.l4_csum_enable = 0,
+		.options.reass_en = 1,
+		.direction = RTE_SECURITY_IPSEC_SA_DIR_EGRESS,
+		.proto = RTE_SECURITY_IPSEC_SA_PROTO_ESP,
+		.mode = RTE_SECURITY_IPSEC_SA_MODE_TUNNEL,
+		.tunnel.type = RTE_SECURITY_IPSEC_TUNNEL_IPV4,
+		.replay_win_sz = 0,
+	},
+
+	.aead = true,
+
+	.xform = {
+		.aead = {
+			.next = NULL,
+			.type = RTE_CRYPTO_SYM_XFORM_AEAD,
+			.aead = {
+				.op = RTE_CRYPTO_AEAD_OP_ENCRYPT,
+				.algo = RTE_CRYPTO_AEAD_AES_GCM,
+				.key.length = 16,
+				.iv.length = 12,
+				.iv.offset = 0,
+				.digest_length = 16,
+				.aad_length = 12,
+			},
+		},
+	},
+};
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH 6/8] app/test: add IP reassembly case with no frags
  2022-01-03 15:08 ` [PATCH 0/8] ethdev: introduce IP " Akhil Goyal
                     ` (4 preceding siblings ...)
  2022-01-03 15:08   ` [PATCH 5/8] app/test: add unit cases for inline IPsec offload Akhil Goyal
@ 2022-01-03 15:08   ` Akhil Goyal
  2022-01-03 15:08   ` [PATCH 7/8] app/test: add IP reassembly cases with multiple fragments Akhil Goyal
                     ` (3 subsequent siblings)
  9 siblings, 0 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-03 15:08 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, Akhil Goyal

test_inline_ipsec testsuite is extended to test IP reassembly of inbound
fragmented packets. The fragmented packet is sent on an interface
which encrypts the packet and then it is loopbacked on the
same interface which decrypts the packet and then attempts IP reassembly
of the decrypted packets.
In this patch, a case is added for packets without fragmentation to
verify the complete path. Other cases are added in subsequent patches.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
---
 app/test/test_inline_ipsec.c | 154 +++++++++++++++++++++++++++++++++++
 1 file changed, 154 insertions(+)

diff --git a/app/test/test_inline_ipsec.c b/app/test/test_inline_ipsec.c
index 54b56ba9e8..f704725c0f 100644
--- a/app/test/test_inline_ipsec.c
+++ b/app/test/test_inline_ipsec.c
@@ -460,6 +460,145 @@ create_default_flow(uint16_t port_id)
 
 struct rte_mbuf **tx_pkts_burst;
 
+static int
+compare_pkt_data(struct rte_mbuf *m, uint8_t *ref, unsigned int tot_len)
+{
+	unsigned int len;
+	unsigned int nb_segs = m->nb_segs;
+	unsigned int matched = 0;
+
+	while (m && nb_segs != 0) {
+		len = tot_len;
+		if (len > m->data_len)
+			len = m->data_len;
+		if (len != 0) {
+			if (memcmp(rte_pktmbuf_mtod(m, char *),
+					ref + matched, len)) {
+				printf("\n====Reassembly case failed: Data Mismatch");
+				rte_hexdump(stdout, "Reassembled",
+					rte_pktmbuf_mtod(m, char *),
+					len);
+				rte_hexdump(stdout, "reference",
+					ref + matched,
+					len);
+				return TEST_FAILED;
+			}
+		}
+		tot_len -= len;
+		matched += len;
+		m = m->next;
+		nb_segs--;
+	}
+	return TEST_SUCCESS;
+}
+
+static int
+test_reassembly(struct reassembly_vector *vector,
+		enum rte_security_ipsec_tunnel_type tun_type)
+{
+	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+	unsigned i, portid, nb_rx = 0, nb_tx = 0;
+	struct rte_ipsec_session out_ips = {0};
+	struct rte_ipsec_session in_ips = {0};
+	struct rte_eth_dev_info dev_info = {0};
+	int ret = 0;
+
+	/* Initialize mbuf with test vectors. */
+	nb_tx = reass_test_vectors_init(vector);
+
+	portid = lcore_cfg.port;
+	rte_eth_dev_info_get(portid, &dev_info);
+	if (dev_info.reass_capa.max_frags < nb_tx)
+		return TEST_SKIPPED;
+
+	/**
+	 * Set some finite value in timeout incase PMD support much
+	 * more than requied in this app.
+	 */
+	if (dev_info.reass_capa.reass_timeout > APP_REASS_TIMEOUT) {
+		dev_info.reass_capa.reass_timeout = APP_REASS_TIMEOUT;
+		rte_eth_ip_reassembly_conf_set(portid, &dev_info.reass_capa);
+	}
+
+	init_traffic(mbufpool[lcore_cfg.socketid],
+			tx_pkts_burst, vector->frags, nb_tx);
+
+	/* Create Inline IPsec outbound session. */
+	ret = create_inline_ipsec_session(vector->sa_data, portid, &out_ips,
+			RTE_SECURITY_IPSEC_SA_DIR_EGRESS, tun_type);
+	if (ret)
+		return ret;
+	for (i = 0; i < nb_tx; i++) {
+		if (out_ips.security.ol_flags &
+				RTE_SECURITY_TX_OLOAD_NEED_MDATA)
+			rte_security_set_pkt_metadata(out_ips.security.ctx,
+				out_ips.security.ses, tx_pkts_burst[i], NULL);
+		tx_pkts_burst[i]->ol_flags |= RTE_MBUF_F_TX_SEC_OFFLOAD;
+		tx_pkts_burst[i]->l2_len = RTE_ETHER_HDR_LEN;
+	}
+	/* Create Inline IPsec inbound session. */
+	create_inline_ipsec_session(vector->sa_data, portid, &in_ips,
+			RTE_SECURITY_IPSEC_SA_DIR_INGRESS, tun_type);
+	create_default_flow(portid);
+
+	nb_tx = rte_eth_tx_burst(portid, 0, tx_pkts_burst, nb_tx);
+
+	rte_pause();
+
+	do {
+		nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst, MAX_PKT_BURST);
+		for (i = 0; i < nb_rx; i++) {
+			if ((pkts_burst[i]->ol_flags &
+			    RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE) &&
+			    rte_eth_ip_reass_dynfield_is_registered()) {
+				rte_eth_ip_reass_dynfield_t *dynfield[MAX_PKT_BURST];
+				int j = 0;
+
+				dynfield[j] = rte_eth_ip_reass_dynfield(pkts_burst[i]);
+				while ((dynfield[j]->next_frag->ol_flags &
+				    RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE) &&
+				    dynfield[j]->nb_frags > 0) {
+
+					rte_pktmbuf_dump(stdout,
+						dynfield[j]->next_frag,
+						dynfield[j]->next_frag->data_len);
+					j++;
+					dynfield[j] = rte_eth_ip_reass_dynfield(
+						dynfield[j-1]->next_frag);
+				}
+				/**
+				 * IP reassembly offload is incomplete, and
+				 * fragments are listed in dynfield which
+				 * can be reassembled in SW.
+				 */
+				printf("\nHW IP Reassembly failed,"
+					"\nAttempt SW IP Reassembly,"
+					"\nmbuf is chained with fragments.\n");
+			}
+		}
+	} while (nb_rx == 0);
+
+	/* Clear session data. */
+	rte_security_session_destroy(out_ips.security.ctx,
+				     out_ips.security.ses);
+	rte_security_session_destroy(in_ips.security.ctx,
+				     in_ips.security.ses);
+
+	/* Compare results with known vectors. */
+	if (nb_rx == 1) {
+		if (vector->full_pkt->len == pkts_burst[0]->pkt_len)
+			return compare_pkt_data(pkts_burst[0],
+					vector->full_pkt->data,
+					vector->full_pkt->len);
+		else {
+			rte_pktmbuf_dump(stdout, pkts_burst[0],
+					pkts_burst[0]->pkt_len);
+		}
+	}
+
+	return TEST_FAILED;
+}
+
 static int
 test_ipsec(struct reassembly_vector *vector,
 	   enum rte_security_ipsec_sa_direction dir,
@@ -703,6 +842,18 @@ test_ipsec_ipv4_decap_nofrag(void) {
 			RTE_SECURITY_IPSEC_TUNNEL_IPV4);
 }
 
+static int
+test_reassembly_ipv4_nofrag(void) {
+	struct reassembly_vector ipv4_nofrag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_plain,
+				.frags[0] = &pkt_ipv4_plain,
+	};
+	return test_reassembly(&ipv4_nofrag_case,
+			RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+
 static struct unit_test_suite inline_ipsec_testsuite  = {
 	.suite_name = "Inline IPsec Ethernet Device Unit Test Suite",
 	.setup = testsuite_setup,
@@ -714,6 +865,9 @@ static struct unit_test_suite inline_ipsec_testsuite  = {
 		TEST_CASE_ST(ut_setup_inline_ipsec,
 				ut_teardown_inline_ipsec,
 				test_ipsec_ipv4_decap_nofrag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_ipv4_nofrag),
 
 		TEST_CASES_END() /**< NULL terminate unit test array */
 	}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH 7/8] app/test: add IP reassembly cases with multiple fragments
  2022-01-03 15:08 ` [PATCH 0/8] ethdev: introduce IP " Akhil Goyal
                     ` (5 preceding siblings ...)
  2022-01-03 15:08   ` [PATCH 6/8] app/test: add IP reassembly case with no frags Akhil Goyal
@ 2022-01-03 15:08   ` Akhil Goyal
  2022-01-03 15:08   ` [PATCH 8/8] app/test: add IP reassembly negative cases Akhil Goyal
                     ` (2 subsequent siblings)
  9 siblings, 0 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-03 15:08 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, Akhil Goyal

More cases are added in test_inline_ipsec test suite to verify packets
having multiple IP(v4/v6) fragments. These fragments are encrypted
and then decrypted as per inline IPsec processing and then an attempt
is made to reassemble the fragments. The reassembled packet
content is matched with the known test vectors.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
---
 app/test/test_inline_ipsec.c                  | 101 +++
 .../test_inline_ipsec_reassembly_vectors.h    | 592 ++++++++++++++++++
 2 files changed, 693 insertions(+)

diff --git a/app/test/test_inline_ipsec.c b/app/test/test_inline_ipsec.c
index f704725c0f..3f3731760d 100644
--- a/app/test/test_inline_ipsec.c
+++ b/app/test/test_inline_ipsec.c
@@ -853,6 +853,89 @@ test_reassembly_ipv4_nofrag(void) {
 			RTE_SECURITY_IPSEC_TUNNEL_IPV4);
 }
 
+static int
+test_reassembly_ipv4_2frag(void) {
+	struct reassembly_vector ipv4_2frag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_udp_p1,
+				.frags[0] = &pkt_ipv4_udp_p1_f1,
+				.frags[1] = &pkt_ipv4_udp_p1_f2,
+
+	};
+	return test_reassembly(&ipv4_2frag_case,
+			RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+static int
+test_reassembly_ipv6_2frag(void) {
+	struct reassembly_vector ipv6_2frag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv6_udp_p1,
+				.frags[0] = &pkt_ipv6_udp_p1_f1,
+				.frags[1] = &pkt_ipv6_udp_p1_f2,
+	};
+	return test_reassembly(&ipv6_2frag_case,
+			RTE_SECURITY_IPSEC_TUNNEL_IPV6);
+}
+
+static int
+test_reassembly_ipv4_4frag(void) {
+	struct reassembly_vector ipv4_4frag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_udp_p2,
+				.frags[0] = &pkt_ipv4_udp_p2_f1,
+				.frags[1] = &pkt_ipv4_udp_p2_f2,
+				.frags[2] = &pkt_ipv4_udp_p2_f3,
+				.frags[3] = &pkt_ipv4_udp_p2_f4,
+	};
+	return test_reassembly(&ipv4_4frag_case,
+			RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+static int
+test_reassembly_ipv6_4frag(void) {
+	struct reassembly_vector ipv6_4frag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv6_udp_p2,
+				.frags[0] = &pkt_ipv6_udp_p2_f1,
+				.frags[1] = &pkt_ipv6_udp_p2_f2,
+				.frags[2] = &pkt_ipv6_udp_p2_f3,
+				.frags[3] = &pkt_ipv6_udp_p2_f4,
+	};
+	return test_reassembly(&ipv6_4frag_case,
+			RTE_SECURITY_IPSEC_TUNNEL_IPV6);
+}
+
+static int
+test_reassembly_ipv4_5frag(void) {
+	struct reassembly_vector ipv4_5frag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_udp_p3,
+				.frags[0] = &pkt_ipv4_udp_p3_f1,
+				.frags[1] = &pkt_ipv4_udp_p3_f2,
+				.frags[2] = &pkt_ipv4_udp_p3_f3,
+				.frags[3] = &pkt_ipv4_udp_p3_f4,
+				.frags[4] = &pkt_ipv4_udp_p3_f5,
+	};
+	return test_reassembly(&ipv4_5frag_case,
+			RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+static int
+test_reassembly_ipv6_5frag(void) {
+	struct reassembly_vector ipv6_5frag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv6_udp_p3,
+				.frags[0] = &pkt_ipv6_udp_p3_f1,
+				.frags[1] = &pkt_ipv6_udp_p3_f2,
+				.frags[2] = &pkt_ipv6_udp_p3_f3,
+				.frags[3] = &pkt_ipv6_udp_p3_f4,
+				.frags[4] = &pkt_ipv6_udp_p3_f5,
+	};
+	return test_reassembly(&ipv6_5frag_case,
+			RTE_SECURITY_IPSEC_TUNNEL_IPV6);
+}
+
 
 static struct unit_test_suite inline_ipsec_testsuite  = {
 	.suite_name = "Inline IPsec Ethernet Device Unit Test Suite",
@@ -868,6 +951,24 @@ static struct unit_test_suite inline_ipsec_testsuite  = {
 		TEST_CASE_ST(ut_setup_inline_ipsec,
 				ut_teardown_inline_ipsec,
 				test_reassembly_ipv4_nofrag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_ipv4_2frag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_ipv6_2frag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_ipv4_4frag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_ipv6_4frag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_ipv4_5frag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_ipv6_5frag),
 
 		TEST_CASES_END() /**< NULL terminate unit test array */
 	}
diff --git a/app/test/test_inline_ipsec_reassembly_vectors.h b/app/test/test_inline_ipsec_reassembly_vectors.h
index 68066a0957..04cc3367c1 100644
--- a/app/test/test_inline_ipsec_reassembly_vectors.h
+++ b/app/test/test_inline_ipsec_reassembly_vectors.h
@@ -4,6 +4,47 @@
 #ifndef _TEST_INLINE_IPSEC_REASSEMBLY_VECTORS_H_
 #define _TEST_INLINE_IPSEC_REASSEMBLY_VECTORS_H_
 
+/* The source file includes below test vectors */
+/* IPv6:
+ *
+ *	1) pkt_ipv6_udp_p1
+ *		pkt_ipv6_udp_p1_f1
+ *		pkt_ipv6_udp_p1_f2
+ *
+ *	2) pkt_ipv6_udp_p2
+ *		pkt_ipv6_udp_p2_f1
+ *		pkt_ipv6_udp_p2_f2
+ *		pkt_ipv6_udp_p2_f3
+ *		pkt_ipv6_udp_p2_f4
+ *
+ *	3) pkt_ipv6_udp_p3
+ *		pkt_ipv6_udp_p3_f1
+ *		pkt_ipv6_udp_p3_f2
+ *		pkt_ipv6_udp_p3_f3
+ *		pkt_ipv6_udp_p3_f4
+ *		pkt_ipv6_udp_p3_f5
+ */
+
+/* IPv4:
+ *
+ *	1) pkt_ipv4_udp_p1
+ *		pkt_ipv4_udp_p1_f1
+ *		pkt_ipv4_udp_p1_f2
+ *
+ *	2) pkt_ipv4_udp_p2
+ *		pkt_ipv4_udp_p2_f1
+ *		pkt_ipv4_udp_p2_f2
+ *		pkt_ipv4_udp_p2_f3
+ *		pkt_ipv4_udp_p2_f4
+ *
+ *	3) pkt_ipv4_udp_p3
+ *		pkt_ipv4_udp_p3_f1
+ *		pkt_ipv4_udp_p3_f2
+ *		pkt_ipv4_udp_p3_f3
+ *		pkt_ipv4_udp_p3_f4
+ *		pkt_ipv4_udp_p3_f5
+ */
+
 #define MAX_FRAG_LEN		 1500
 #define MAX_FRAGS		 6
 #define MAX_PKT_LEN		 (MAX_FRAG_LEN * MAX_FRAGS)
@@ -43,6 +84,557 @@ struct reassembly_vector {
 	struct ipsec_test_packet *frags[MAX_FRAGS];
 };
 
+struct ipsec_test_packet pkt_ipv6_udp_p1 = {
+	.len = 1514,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 54,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0xb4, 0x2C, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x05, 0xb4, 0x2b, 0xe8,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p1_f1 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x00, 0x01, 0x5c, 0x92, 0xac, 0xf1,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x05, 0xb4, 0x2b, 0xe8,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p1_f2 = {
+	.len = 186,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x00, 0x84, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x05, 0x38, 0x5c, 0x92, 0xac, 0xf1,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p2 = {
+	.len = 4496,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 54,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x11, 0x5a, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x11, 0x5a, 0x8a, 0x11,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p2_f1 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x00, 0x01, 0x64, 0x6c, 0x68, 0x9f,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x11, 0x5a, 0x8a, 0x11,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p2_f2 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x05, 0x39, 0x64, 0x6c, 0x68, 0x9f,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p2_f3 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x0a, 0x71, 0x64, 0x6c, 0x68, 0x9f,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p2_f4 = {
+	.len = 496,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x01, 0xba, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x0f, 0xa8, 0x64, 0x6c, 0x68, 0x9f,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p3 = {
+	.len = 5796,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x16, 0x6e, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x16, 0x6e, 0x2f, 0x99,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p3_f1 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x00, 0x01, 0x65, 0xcf, 0x5a, 0xae,
+
+		/* UDP */
+		0x80, 0x00, 0x27, 0x10, 0x16, 0x6e, 0x2f, 0x99,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p3_f2 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x05, 0x39, 0x65, 0xcf, 0x5a, 0xae,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p3_f3 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x0a, 0x71, 0x65, 0xcf, 0x5a, 0xae,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p3_f4 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x0f, 0xa9, 0x65, 0xcf, 0x5a, 0xae,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p3_f5 = {
+	.len = 460,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x01, 0x96, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x14, 0xe0, 0x65, 0xcf, 0x5a, 0xae,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p1 = {
+	.len = 1514,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0xdc, 0x00, 0x01, 0x00, 0x00,
+		0x40, 0x11, 0x66, 0x0d, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x05, 0xc8, 0xb8, 0x4c,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p1_f1 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x01, 0x20, 0x00,
+		0x40, 0x11, 0x46, 0x5d, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x05, 0xc8, 0xb8, 0x4c,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p1_f2 = {
+	.len = 114,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x00, 0x64, 0x00, 0x01, 0x00, 0xaf,
+		0x40, 0x11, 0x6a, 0xd6, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p2 = {
+	.len = 4496,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x11, 0x82, 0x00, 0x02, 0x00, 0x00,
+		0x40, 0x11, 0x5a, 0x66, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x11, 0x6e, 0x16, 0x76,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p2_f1 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x02, 0x20, 0x00,
+		0x40, 0x11, 0x46, 0x5c, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x11, 0x6e, 0x16, 0x76,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p2_f2 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x02, 0x20, 0xaf,
+		0x40, 0x11, 0x45, 0xad, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p2_f3 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x02, 0x21, 0x5e,
+		0x40, 0x11, 0x44, 0xfe, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p2_f4 = {
+	.len = 296,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x01, 0x1a, 0x00, 0x02, 0x02, 0x0d,
+		0x40, 0x11, 0x68, 0xc1, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p3 = {
+	.len = 5796,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x16, 0x96, 0x00, 0x03, 0x00, 0x00,
+		0x40, 0x11, 0x55, 0x51, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x16, 0x82, 0xbb, 0xfd,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p3_f1 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x03, 0x20, 0x00,
+		0x40, 0x11, 0x46, 0x5b, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x80, 0x00, 0x27, 0x10, 0x16, 0x82, 0xbb, 0xfd,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p3_f2 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x03, 0x20, 0xaf,
+		0x40, 0x11, 0x45, 0xac, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p3_f3 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x03, 0x21, 0x5e,
+		0x40, 0x11, 0x44, 0xfd, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p3_f4 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x03, 0x22, 0x0d,
+		0x40, 0x11, 0x44, 0x4e, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p3_f5 = {
+	.len = 196,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x00, 0xb6, 0x00, 0x03, 0x02, 0xbc,
+		0x40, 0x11, 0x68, 0x75, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
 struct ipsec_test_packet pkt_ipv4_plain = {
 	.len = 76,
 	.l2_offset = 0,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH 8/8] app/test: add IP reassembly negative cases
  2022-01-03 15:08 ` [PATCH 0/8] ethdev: introduce IP " Akhil Goyal
                     ` (6 preceding siblings ...)
  2022-01-03 15:08   ` [PATCH 7/8] app/test: add IP reassembly cases with multiple fragments Akhil Goyal
@ 2022-01-03 15:08   ` Akhil Goyal
  2022-01-06  9:51   ` [PATCH 0/8] ethdev: introduce IP reassembly offload David Marchand
  2022-01-20 16:26   ` [PATCH v2 0/4] " Akhil Goyal
  9 siblings, 0 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-03 15:08 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, Akhil Goyal

test_inline_ipsec testsuite is added with cases where the IP reassembly
is incomplete and software will need to reassemble them later.
The failure cases added are:
- all fragments are not received.
- same fragment is received more than once.
- out of order fragments.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
---
 app/test/test_inline_ipsec.c | 53 ++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/app/test/test_inline_ipsec.c b/app/test/test_inline_ipsec.c
index 3f3731760d..0d74e23359 100644
--- a/app/test/test_inline_ipsec.c
+++ b/app/test/test_inline_ipsec.c
@@ -936,6 +936,50 @@ test_reassembly_ipv6_5frag(void) {
 			RTE_SECURITY_IPSEC_TUNNEL_IPV6);
 }
 
+static int
+test_reassembly_incomplete(void) {
+	/* Negative test case, not sending all fragments. */
+	struct reassembly_vector ipv4_incomplete_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_udp_p2,
+				.frags[0] = &pkt_ipv4_udp_p2_f1,
+				.frags[1] = &pkt_ipv4_udp_p2_f2,
+				.frags[2] = NULL,
+				.frags[3] = NULL,
+	};
+	return test_reassembly(&ipv4_incomplete_case,
+			RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+static int
+test_reassembly_overlap(void) {
+	/* Negative test case, sending 1 fragment twice. */
+	struct reassembly_vector ipv4_overlap_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_udp_p2,
+				.frags[0] = &pkt_ipv4_udp_p2_f1,
+				.frags[1] = &pkt_ipv4_udp_p2_f2,
+				.frags[2] = &pkt_ipv4_udp_p2_f2, /* overlap */
+				.frags[3] = &pkt_ipv4_udp_p2_f3,
+	};
+	return test_reassembly(&ipv4_overlap_case,
+			RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+static int
+test_reassembly_out_of_order(void) {
+	/* Negative test case, sending 1 fragment twice. */
+	struct reassembly_vector ipv4_ooo_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_udp_p2,
+				.frags[0] = &pkt_ipv4_udp_p2_f4,
+				.frags[1] = &pkt_ipv4_udp_p2_f3,
+				.frags[2] = &pkt_ipv4_udp_p2_f1,
+				.frags[3] = &pkt_ipv4_udp_p2_f2,
+	};
+	return test_reassembly(&ipv4_ooo_case,
+			RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
 
 static struct unit_test_suite inline_ipsec_testsuite  = {
 	.suite_name = "Inline IPsec Ethernet Device Unit Test Suite",
@@ -969,6 +1013,15 @@ static struct unit_test_suite inline_ipsec_testsuite  = {
 		TEST_CASE_ST(ut_setup_inline_ipsec,
 				ut_teardown_inline_ipsec,
 				test_reassembly_ipv6_5frag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_incomplete),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_overlap),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_out_of_order),
 
 		TEST_CASES_END() /**< NULL terminate unit test array */
 	}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH 0/8] ethdev: introduce IP reassembly offload
  2022-01-03 15:08 ` [PATCH 0/8] ethdev: introduce IP " Akhil Goyal
                     ` (7 preceding siblings ...)
  2022-01-03 15:08   ` [PATCH 8/8] app/test: add IP reassembly negative cases Akhil Goyal
@ 2022-01-06  9:51   ` David Marchand
  2022-01-06  9:54     ` [EXT] " Akhil Goyal
  2022-01-20 16:26   ` [PATCH v2 0/4] " Akhil Goyal
  9 siblings, 1 reply; 53+ messages in thread
From: David Marchand @ 2022-01-06  9:51 UTC (permalink / raw)
  To: Akhil Goyal
  Cc: dev, Anoob Joseph, Radu Nicolau, Declan Doherty, Hemant Agrawal,
	Matan Azrad, Ananyev, Konstantin, Thomas Monjalon, Yigit, Ferruh,
	Andrew Rybchenko, Olivier Matz, Rosen Xu

On Mon, Jan 3, 2022 at 4:08 PM Akhil Goyal <gakhil@marvell.com> wrote:
>
> As discussed in the RFC[1] sent in 21.11, a new offload is
> introduced in ethdev for IP reassembly.
>
> This patchset add the RX offload and an application to test it.
> Currently, the offload is tested along with inline IPsec processing.
> It can also be updated as a standalone offload without IPsec, if there
> are some hardware available to test it.
> The patchset is tested on cnxk platform. The driver implementation is
> added as a separate patchset.
>
> [1]: http://patches.dpdk.org/project/dpdk/patch/20210823100259.1619886-1-gakhil@marvell.com/
>
>
> Akhil Goyal (8):
>   ethdev: introduce IP reassembly offload
>   ethdev: add dev op for IP reassembly configuration
>   ethdev: add mbuf dynfield for incomplete IP reassembly
>   security: add IPsec option for IP reassembly
>   app/test: add unit cases for inline IPsec offload
>   app/test: add IP reassembly case with no frags
>   app/test: add IP reassembly cases with multiple fragments
>   app/test: add IP reassembly negative cases
>
>  app/test/meson.build                          |    1 +
>  app/test/test_inline_ipsec.c                  | 1036 +++++++++++++++++
>  .../test_inline_ipsec_reassembly_vectors.h    |  790 +++++++++++++

I see no update in MAINTAINERS for those new files.
So I think they end up in the "main" repo scope.

You can either update MAINTAINERS (changing the app/test/test_ipsec*
pattern as app/test/test_*ipsec*) or rename files as
app/test/test_ipsec_inline.c, for example.

-- 
David Marchand


^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [EXT] Re: [PATCH 0/8] ethdev: introduce IP reassembly offload
  2022-01-06  9:51   ` [PATCH 0/8] ethdev: introduce IP reassembly offload David Marchand
@ 2022-01-06  9:54     ` Akhil Goyal
  0 siblings, 0 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-06  9:54 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, Anoob Joseph, Radu Nicolau, Declan Doherty, Hemant Agrawal,
	Matan Azrad, Ananyev, Konstantin, Thomas Monjalon, Yigit, Ferruh,
	Andrew Rybchenko, Olivier Matz, Rosen Xu

> > Akhil Goyal (8):
> >   ethdev: introduce IP reassembly offload
> >   ethdev: add dev op for IP reassembly configuration
> >   ethdev: add mbuf dynfield for incomplete IP reassembly
> >   security: add IPsec option for IP reassembly
> >   app/test: add unit cases for inline IPsec offload
> >   app/test: add IP reassembly case with no frags
> >   app/test: add IP reassembly cases with multiple fragments
> >   app/test: add IP reassembly negative cases
> >
> >  app/test/meson.build                          |    1 +
> >  app/test/test_inline_ipsec.c                  | 1036 +++++++++++++++++
> >  .../test_inline_ipsec_reassembly_vectors.h    |  790 +++++++++++++
> 
> I see no update in MAINTAINERS for those new files.
> So I think they end up in the "main" repo scope.
> 
> You can either update MAINTAINERS (changing the app/test/test_ipsec*
> pattern as app/test/test_*ipsec*) or rename files as
> app/test/test_ipsec_inline.c, for example.
> 
Thanks for the update David,
There are a few other issues in the patchset, I will post a new version in next few days
With MAINTAINERS updated.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 1/8] ethdev: introduce IP reassembly offload
  2022-01-03 15:08   ` [PATCH 1/8] " Akhil Goyal
@ 2022-01-11 16:03     ` Ananyev, Konstantin
  2022-01-22  7:38     ` Andrew Rybchenko
  1 sibling, 0 replies; 53+ messages in thread
From: Ananyev, Konstantin @ 2022-01-11 16:03 UTC (permalink / raw)
  To: Akhil Goyal, dev
  Cc: anoobj, Nicolau, Radu, Doherty, Declan, hemant.agrawal, matan,
	thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu, Rosen


> IP Reassembly is a costly operation if it is done in software.
> The operation becomes even more costlier if IP fragmants are encrypted.
> However, if it is offloaded to HW, it can considerably save application cycles.
> 
> Hence, a new offload RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY is introduced in
> ethdev for devices which can attempt reassembly of packets in hardware.
> rte_eth_dev_info is updated with the reassembly capabilities which a device
> can support.
> 
> The resulting reassembled packet would be a typical segmented mbuf in
> case of success.
> 
> And if reassembly of fragments is failed or is incomplete (if fragments do
> not come before the reass_timeout), the mbuf ol_flags can be updated.
> This is updated in a subsequent patch.
> 
> Signed-off-by: Akhil Goyal <gakhil@marvell.com>
> ---
>  doc/guides/nics/features.rst | 12 ++++++++++++
>  lib/ethdev/rte_ethdev.c      |  1 +
>  lib/ethdev/rte_ethdev.h      | 32 +++++++++++++++++++++++++++++++-
>  3 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
> index 27be2d2576..1dfdee9602 100644
> --- a/doc/guides/nics/features.rst
> +++ b/doc/guides/nics/features.rst
> @@ -602,6 +602,18 @@ Supports inner packet L4 checksum.
>    ``tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM``.
> 
> 
> +.. _nic_features_ip_reassembly:
> +
> +IP reassembly
> +-------------
> +
> +Supports IP reassembly in hardware.
> +
> +* **[uses]     rte_eth_rxconf,rte_eth_rxmode**: ``offloads:RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY``.
> +* **[provides] mbuf**: ``mbuf.ol_flags:RTE_MBUF_F_RX_IP_REASSEMBLY_INCOMPLETE``.
> +* **[provides] rte_eth_dev_info**: ``reass_capa``.
> +
> +
>  .. _nic_features_shared_rx_queue:
> 
>  Shared Rx queue
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index a1d475a292..d9a03f12f9 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -126,6 +126,7 @@ static const struct {
>  	RTE_RX_OFFLOAD_BIT2STR(OUTER_UDP_CKSUM),
>  	RTE_RX_OFFLOAD_BIT2STR(RSS_HASH),
>  	RTE_RX_OFFLOAD_BIT2STR(BUFFER_SPLIT),
> +	RTE_RX_OFFLOAD_BIT2STR(IP_REASSEMBLY),
>  };
> 
>  #undef RTE_RX_OFFLOAD_BIT2STR
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index fa299c8ad7..11427b2e4d 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -1586,6 +1586,7 @@ struct rte_eth_conf {
>  #define RTE_ETH_RX_OFFLOAD_RSS_HASH         RTE_BIT64(19)
>  #define DEV_RX_OFFLOAD_RSS_HASH             RTE_ETH_RX_OFFLOAD_RSS_HASH
>  #define RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT     RTE_BIT64(20)
> +#define RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY    RTE_BIT64(21)
> 
>  #define RTE_ETH_RX_OFFLOAD_CHECKSUM (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \
>  				 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | \
> @@ -1781,6 +1782,33 @@ enum rte_eth_representor_type {
>  	RTE_ETH_REPRESENTOR_PF,   /**< representor of Physical Function. */
>  };
> 
> +/* Flag to offload IP reassembly for IPv4 packets. */
> +#define RTE_ETH_DEV_REASSEMBLY_F_IPV4 (RTE_BIT32(0))
> +/* Flag to offload IP reassembly for IPv6 packets. */
> +#define RTE_ETH_DEV_REASSEMBLY_F_IPV6 (RTE_BIT32(1))
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this structure may change without prior notice.
> + *
> + * A structure used to set IP reassembly configuration.
> + *
> + * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
> + * the PMD will attempt IP reassembly for the received packets as per
> + * properties defined in this structure:
> + *
> + */
> +struct rte_eth_ip_reass_params {
> +	/** Maximum time in ms which PMD can wait for other fragments. */
> +	uint32_t reass_timeout;
> +	/** Maximum number of fragments that can be reassembled. */
> +	uint16_t max_frags;
> +	/**
> +	 * Flags to enable reassembly of packet types -
> +	 * RTE_ETH_DEV_REASSEMBLY_F_xxx.
> +	 */
> +	uint16_t flags;
> +};
> +
>  /**
>   * A structure used to retrieve the contextual information of
>   * an Ethernet device, such as the controlling driver of the
> @@ -1841,8 +1869,10 @@ struct rte_eth_dev_info {
>  	 * embedded managed interconnect/switch.
>  	 */
>  	struct rte_eth_switch_info switch_info;
> +	/** IP reassembly offload capabilities that a device can support. */
> +	struct rte_eth_ip_reass_params reass_capa;
> 
> -	uint64_t reserved_64s[2]; /**< Reserved for future fields */
> +	uint64_t reserved_64s[1]; /**< Reserved for future fields */
>  	void *reserved_ptrs[2];   /**< Reserved for future fields */
>  };
> 
> --

Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

> 2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 2/8] ethdev: add dev op for IP reassembly configuration
  2022-01-03 15:08   ` [PATCH 2/8] ethdev: add dev op for IP reassembly configuration Akhil Goyal
@ 2022-01-11 16:09     ` Ananyev, Konstantin
  2022-01-11 18:54       ` Akhil Goyal
  0 siblings, 1 reply; 53+ messages in thread
From: Ananyev, Konstantin @ 2022-01-11 16:09 UTC (permalink / raw)
  To: Akhil Goyal, dev
  Cc: anoobj, Nicolau, Radu, Doherty, Declan, hemant.agrawal, matan,
	thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu, Rosen



> A new ethernet device op is added to give application control over
> the IP reassembly configuration. This operation is an optional
> call from the application, default values are set by PMD and
> exposed via rte_eth_dev_info.
> Application should always first retreive the capabilities from
> rte_eth_dev_info and then set the fields accordingly.
> 
> Signed-off-by: Akhil Goyal <gakhil@marvell.com>
> ---
>  lib/ethdev/ethdev_driver.h | 19 +++++++++++++++++++
>  lib/ethdev/rte_ethdev.c    | 30 ++++++++++++++++++++++++++++++
>  lib/ethdev/rte_ethdev.h    | 28 ++++++++++++++++++++++++++++
>  lib/ethdev/version.map     |  3 +++
>  4 files changed, 80 insertions(+)
> 
> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
> index d95605a355..0ed53c14f3 100644
> --- a/lib/ethdev/ethdev_driver.h
> +++ b/lib/ethdev/ethdev_driver.h
> @@ -990,6 +990,22 @@ typedef int (*eth_representor_info_get_t)(struct rte_eth_dev *dev,
>  typedef int (*eth_rx_metadata_negotiate_t)(struct rte_eth_dev *dev,
>  				       uint64_t *features);
> 
> +/**
> + * @internal
> + * Set configuration parameters for enabling IP reassembly offload in hardware.
> + *
> + * @param dev
> + *   Port (ethdev) handle
> + *
> + * @param[in] conf
> + *   Configuration parameters for IP reassembly.
> + *
> + * @return
> + *   Negative errno value on error, zero otherwise
> + */
> +typedef int (*eth_ip_reassembly_conf_set_t)(struct rte_eth_dev *dev,
> +				       struct rte_eth_ip_reass_params *conf);
> +
>  /**
>   * @internal A structure containing the functions exported by an Ethernet driver.
>   */
> @@ -1186,6 +1202,9 @@ struct eth_dev_ops {
>  	 * kinds of metadata to the PMD
>  	 */
>  	eth_rx_metadata_negotiate_t rx_metadata_negotiate;
> +
> +	/** Set IP reassembly configuration */
> +	eth_ip_reassembly_conf_set_t ip_reassembly_conf_set;
>  };
> 
>  /**
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index d9a03f12f9..ecc6c1fe37 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -6473,6 +6473,36 @@ rte_eth_rx_metadata_negotiate(uint16_t port_id, uint64_t *features)
>  		       (*dev->dev_ops->rx_metadata_negotiate)(dev, features));
>  }
> 
> +int
> +rte_eth_ip_reassembly_conf_set(uint16_t port_id,
> +			       struct rte_eth_ip_reass_params *conf)
> +{
> +	struct rte_eth_dev *dev;
> +
> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> +	dev = &rte_eth_devices[port_id];

Should we check here that device is properly configured, but not started yet?
Another question - if we have reassembly_conf_set() would it make sense to
have also reassembly_conf_get?
So user can retrieve current ip_reassembly config values? 

> +
> +	if ((dev->data->dev_conf.rxmode.offloads &
> +			RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY) == 0) {
> +		RTE_ETHDEV_LOG(ERR,
> +			"The port (ID=%"PRIu16") is not configured for IP reassembly\n",
> +			port_id);
> +		return -EINVAL;
> +	}
> +
> +
> +	if (conf == NULL) {
> +		RTE_ETHDEV_LOG(ERR,
> +				"Invalid IP reassembly configuration (NULL)\n");
> +		return -EINVAL;
> +	}
> +
> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->ip_reassembly_conf_set,
> +				-ENOTSUP);
> +	return eth_err(port_id,
> +		       (*dev->dev_ops->ip_reassembly_conf_set)(dev, conf));
> +}
> +
>  RTE_LOG_REGISTER_DEFAULT(rte_eth_dev_logtype, INFO);
> 
>  RTE_INIT(ethdev_init_telemetry)

^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 3/8] ethdev: add mbuf dynfield for incomplete IP reassembly
  2022-01-03 15:08   ` [PATCH 3/8] ethdev: add mbuf dynfield for incomplete IP reassembly Akhil Goyal
@ 2022-01-11 17:04     ` Ananyev, Konstantin
  2022-01-11 18:44       ` Akhil Goyal
  0 siblings, 1 reply; 53+ messages in thread
From: Ananyev, Konstantin @ 2022-01-11 17:04 UTC (permalink / raw)
  To: Akhil Goyal, dev
  Cc: anoobj, Nicolau, Radu, Doherty, Declan, hemant.agrawal, matan,
	thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu, Rosen


> Hardware IP reassembly may be incomplete for multiple reasons like
> reassembly timeout reached, duplicate fragments, etc.
> To save application cycles to process these packets again, a new
> mbuf ol_flag (RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE) is added to
> show that the mbuf received is not reassembled properly.

If we use dynfiled for data, why not use dynflag for RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE?
That way we can avoid introduced hardcoded (always defined) flags for that case. 

> 
> Now if this flag is set, application can retreive corresponding chain of
> mbufs using mbuf dynfield set by the PMD. Now, it will be upto
> application to either drop those fragments or wait for more time.
> 
> Signed-off-by: Akhil Goyal <gakhil@marvell.com>
> ---
>  lib/ethdev/ethdev_driver.h |  8 ++++++
>  lib/ethdev/rte_ethdev.c    | 16 +++++++++++
>  lib/ethdev/rte_ethdev.h    | 57 ++++++++++++++++++++++++++++++++++++++
>  lib/ethdev/version.map     |  2 ++
>  lib/mbuf/rte_mbuf_core.h   |  3 +-
>  5 files changed, 85 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
> index 0ed53c14f3..9a0bab9a61 100644
> --- a/lib/ethdev/ethdev_driver.h
> +++ b/lib/ethdev/ethdev_driver.h
> @@ -1671,6 +1671,14 @@ int
>  rte_eth_hairpin_queue_peer_unbind(uint16_t cur_port, uint16_t cur_queue,
>  				  uint32_t direction);
> 
> +/**
> + * @internal
> + * Register mbuf dynamic field for IP reassembly incomplete case.
> + */
> +__rte_internal
> +int
> +rte_eth_ip_reass_dynfield_register(void);
> +
> 
>  /*
>   * Legacy ethdev API used internally by drivers.
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index ecc6c1fe37..d53ce4eaca 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -6503,6 +6503,22 @@ rte_eth_ip_reassembly_conf_set(uint16_t port_id,
>  		       (*dev->dev_ops->ip_reassembly_conf_set)(dev, conf));
>  }
> 
> +#define RTE_ETH_IP_REASS_DYNFIELD_NAME "rte_eth_ip_reass_dynfield"
> +int rte_eth_ip_reass_dynfield_offset = -1;
> +
> +int
> +rte_eth_ip_reass_dynfield_register(void)
> +{
> +	static const struct rte_mbuf_dynfield dynfield_desc = {
> +		.name = RTE_ETH_IP_REASS_DYNFIELD_NAME,
> +		.size = sizeof(rte_eth_ip_reass_dynfield_t),
> +		.align = __alignof__(rte_eth_ip_reass_dynfield_t),
> +	};
> +	rte_eth_ip_reass_dynfield_offset =
> +		rte_mbuf_dynfield_register(&dynfield_desc);
> +	return rte_eth_ip_reass_dynfield_offset;
> +}
> +
>  RTE_LOG_REGISTER_DEFAULT(rte_eth_dev_logtype, INFO);
> 
>  RTE_INIT(ethdev_init_telemetry)
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index 891f9a6e06..c4024d2265 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -5245,6 +5245,63 @@ __rte_experimental
>  int rte_eth_ip_reassembly_conf_set(uint16_t port_id,
>  				   struct rte_eth_ip_reass_params *conf);
> 
> +/**
> + * In case of IP reassembly offload failure, ol_flags in mbuf will be set
> + * with RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE and packets will be returned
> + * without alteration. The application can retrieve the attached fragments
> + * using mbuf dynamic field.
> + */	
> +typedef struct {
> +	/**
> +	 * Next fragment packet. Application should fetch dynamic field of
> +	 * each fragment until a NULL is received and nb_frags is 0.
> +	 */
> +	struct rte_mbuf *next_frag;
> +	/** Time spent(in ms) by HW in waiting for further fragments. */
> +	uint16_t time_spent;
> +	/** Number of more fragments attached in mbuf dynamic fields. */
> +	uint16_t nb_frags;
> +} rte_eth_ip_reass_dynfield_t;


Looks like a bit of overkill to me:
We do already have 'next' and 'nb_frags' fields inside mbuf,
why can't they be used here? Why a separate ones are necessary?  

> +
> +extern int rte_eth_ip_reass_dynfield_offset;
> +


^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 3/8] ethdev: add mbuf dynfield for incomplete IP reassembly
  2022-01-11 17:04     ` Ananyev, Konstantin
@ 2022-01-11 18:44       ` Akhil Goyal
  2022-01-12 10:30         ` Ananyev, Konstantin
  2022-01-13 13:18         ` Akhil Goyal
  0 siblings, 2 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-11 18:44 UTC (permalink / raw)
  To: Ananyev, Konstantin, dev
  Cc: Anoob Joseph, Nicolau, Radu, Doherty, Declan, hemant.agrawal,
	matan, thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu,
	Rosen

> 
> > Hardware IP reassembly may be incomplete for multiple reasons like
> > reassembly timeout reached, duplicate fragments, etc.
> > To save application cycles to process these packets again, a new
> > mbuf ol_flag (RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE) is added to
> > show that the mbuf received is not reassembled properly.
> 
> If we use dynfiled for data, why not use dynflag for
> RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE?
> That way we can avoid introduced hardcoded (always defined) flags for that
> case.

I have not looked into using dynflag. Will explore if it can be used.


> >
> > +/**
> > + * In case of IP reassembly offload failure, ol_flags in mbuf will be set
> > + * with RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE and packets will be
> returned
> > + * without alteration. The application can retrieve the attached fragments
> > + * using mbuf dynamic field.
> > + */
> > +typedef struct {
> > +	/**
> > +	 * Next fragment packet. Application should fetch dynamic field of
> > +	 * each fragment until a NULL is received and nb_frags is 0.
> > +	 */
> > +	struct rte_mbuf *next_frag;
> > +	/** Time spent(in ms) by HW in waiting for further fragments. */
> > +	uint16_t time_spent;
> > +	/** Number of more fragments attached in mbuf dynamic fields. */
> > +	uint16_t nb_frags;
> > +} rte_eth_ip_reass_dynfield_t;
> 
> 
> Looks like a bit of overkill to me:
> We do already have 'next' and 'nb_frags' fields inside mbuf,
> why can't they be used here? Why a separate ones are necessary?
> 
The next and nb_frags in mbuf is for segmented buffers and not IP fragments.
But here we will have separate mbufs in each dynfield denoting each of the
fragments which may have further segmented buffers.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 2/8] ethdev: add dev op for IP reassembly configuration
  2022-01-11 16:09     ` Ananyev, Konstantin
@ 2022-01-11 18:54       ` Akhil Goyal
  2022-01-12 10:22         ` Ananyev, Konstantin
  0 siblings, 1 reply; 53+ messages in thread
From: Akhil Goyal @ 2022-01-11 18:54 UTC (permalink / raw)
  To: Ananyev, Konstantin, dev
  Cc: Anoob Joseph, Nicolau, Radu, Doherty, Declan, hemant.agrawal,
	matan, thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu,
	Rosen

> > diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> > index d9a03f12f9..ecc6c1fe37 100644
> > --- a/lib/ethdev/rte_ethdev.c
> > +++ b/lib/ethdev/rte_ethdev.c
> > @@ -6473,6 +6473,36 @@ rte_eth_rx_metadata_negotiate(uint16_t port_id,
> uint64_t *features)
> >  		       (*dev->dev_ops->rx_metadata_negotiate)(dev, features));
> >  }
> >
> > +int
> > +rte_eth_ip_reassembly_conf_set(uint16_t port_id,
> > +			       struct rte_eth_ip_reass_params *conf)
> > +{
> > +	struct rte_eth_dev *dev;
> > +
> > +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> > +	dev = &rte_eth_devices[port_id];
> 
> Should we check here that device is properly configured, but not started yet?
Ok will add checks for dev->data->dev_configured and dev->data->dev_started

> Another question - if we have reassembly_conf_set() would it make sense to
> have also reassembly_conf_get?
> So user can retrieve current ip_reassembly config values?
> 
The set/supported values can be retrieved using rte_eth_dev_info :: reass_capa


^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 2/8] ethdev: add dev op for IP reassembly configuration
  2022-01-11 18:54       ` Akhil Goyal
@ 2022-01-12 10:22         ` Ananyev, Konstantin
  2022-01-12 10:32           ` Akhil Goyal
  0 siblings, 1 reply; 53+ messages in thread
From: Ananyev, Konstantin @ 2022-01-12 10:22 UTC (permalink / raw)
  To: Akhil Goyal, dev
  Cc: Anoob Joseph, Nicolau, Radu, Doherty, Declan, hemant.agrawal,
	matan, thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu,
	Rosen



> > > diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> > > index d9a03f12f9..ecc6c1fe37 100644
> > > --- a/lib/ethdev/rte_ethdev.c
> > > +++ b/lib/ethdev/rte_ethdev.c
> > > @@ -6473,6 +6473,36 @@ rte_eth_rx_metadata_negotiate(uint16_t port_id,
> > uint64_t *features)
> > >  		       (*dev->dev_ops->rx_metadata_negotiate)(dev, features));
> > >  }
> > >
> > > +int
> > > +rte_eth_ip_reassembly_conf_set(uint16_t port_id,
> > > +			       struct rte_eth_ip_reass_params *conf)
> > > +{
> > > +	struct rte_eth_dev *dev;
> > > +
> > > +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> > > +	dev = &rte_eth_devices[port_id];
> >
> > Should we check here that device is properly configured, but not started yet?
> Ok will add checks for dev->data->dev_configured and dev->data->dev_started
> 
> > Another question - if we have reassembly_conf_set() would it make sense to
> > have also reassembly_conf_get?
> > So user can retrieve current ip_reassembly config values?
> >
> The set/supported values can be retrieved using rte_eth_dev_info :: reass_capa

Hmm, I thought rte_eth_dev_info :: reass_capa reports
max supported values, not currently set values.
Did I misunderstand something? 



^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 3/8] ethdev: add mbuf dynfield for incomplete IP reassembly
  2022-01-11 18:44       ` Akhil Goyal
@ 2022-01-12 10:30         ` Ananyev, Konstantin
  2022-01-12 10:59           ` Akhil Goyal
  2022-01-13 13:18         ` Akhil Goyal
  1 sibling, 1 reply; 53+ messages in thread
From: Ananyev, Konstantin @ 2022-01-12 10:30 UTC (permalink / raw)
  To: Akhil Goyal, dev
  Cc: Anoob Joseph, Nicolau, Radu, Doherty, Declan, hemant.agrawal,
	matan, thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu,
	Rosen


> >
> > > Hardware IP reassembly may be incomplete for multiple reasons like
> > > reassembly timeout reached, duplicate fragments, etc.
> > > To save application cycles to process these packets again, a new
> > > mbuf ol_flag (RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE) is added to
> > > show that the mbuf received is not reassembled properly.
> >
> > If we use dynfiled for data, why not use dynflag for
> > RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE?
> > That way we can avoid introduced hardcoded (always defined) flags for that
> > case.
> 
> I have not looked into using dynflag. Will explore if it can be used.
> 
> 
> > >
> > > +/**
> > > + * In case of IP reassembly offload failure, ol_flags in mbuf will be set
> > > + * with RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE and packets will be
> > returned
> > > + * without alteration. The application can retrieve the attached fragments
> > > + * using mbuf dynamic field.
> > > + */
> > > +typedef struct {
> > > +	/**
> > > +	 * Next fragment packet. Application should fetch dynamic field of
> > > +	 * each fragment until a NULL is received and nb_frags is 0.
> > > +	 */
> > > +	struct rte_mbuf *next_frag;
> > > +	/** Time spent(in ms) by HW in waiting for further fragments. */
> > > +	uint16_t time_spent;
> > > +	/** Number of more fragments attached in mbuf dynamic fields. */
> > > +	uint16_t nb_frags;
> > > +} rte_eth_ip_reass_dynfield_t;
> >
> >
> > Looks like a bit of overkill to me:
> > We do already have 'next' and 'nb_frags' fields inside mbuf,
> > why can't they be used here? Why a separate ones are necessary?
> >
> The next and nb_frags in mbuf is for segmented buffers and not IP fragments.
> But here we will have separate mbufs in each dynfield denoting each of the
> fragments which may have further segmented buffers.

Makes sense, thanks for explanation.
Though in that case just 'struct rte_mbuf *next_frag' might be enough
(user will walk though the list till mbuf->next_frag != NULL)?
The reason I am asking: current sizeof(rte_eth_ip_reass_dynfield_t) is 16B,
which is quite a lot for mbuf, especially considering that it has to be continuous 16B.
Making it smaller (8B) or even splitting into 2 fileds (8+4) will give it more chances
to coexist with other dynfields. 


^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 2/8] ethdev: add dev op for IP reassembly configuration
  2022-01-12 10:22         ` Ananyev, Konstantin
@ 2022-01-12 10:32           ` Akhil Goyal
  2022-01-12 10:48             ` Ananyev, Konstantin
  0 siblings, 1 reply; 53+ messages in thread
From: Akhil Goyal @ 2022-01-12 10:32 UTC (permalink / raw)
  To: Ananyev, Konstantin, dev
  Cc: Anoob Joseph, Nicolau, Radu, Doherty, Declan, hemant.agrawal,
	matan, thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu,
	Rosen

> > > Another question - if we have reassembly_conf_set() would it make sense to
> > > have also reassembly_conf_get?
> > > So user can retrieve current ip_reassembly config values?
> > >
> > The set/supported values can be retrieved using rte_eth_dev_info ::
> reass_capa
> 
> Hmm, I thought rte_eth_dev_info :: reass_capa reports
> max supported values, not currently set values.
> Did I misunderstand something?
> 
Reassembly configuration is expected to be a one-time setting and is not expected
to change multiple times in the application.
You are correct that rte_eth_dev_info :: reass_capa reports max supported values
by the PMD.
But if somebody uses the _set API, dev_info values will be overwritten.
However, a get API can be added, if we have some use case.
IMO, we can add it later if it will be required.


^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 2/8] ethdev: add dev op for IP reassembly configuration
  2022-01-12 10:32           ` Akhil Goyal
@ 2022-01-12 10:48             ` Ananyev, Konstantin
  2022-01-12 11:06               ` Akhil Goyal
  0 siblings, 1 reply; 53+ messages in thread
From: Ananyev, Konstantin @ 2022-01-12 10:48 UTC (permalink / raw)
  To: Akhil Goyal, dev
  Cc: Anoob Joseph, Nicolau, Radu, Doherty, Declan, hemant.agrawal,
	matan, thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu,
	Rosen



> > > > Another question - if we have reassembly_conf_set() would it make sense to
> > > > have also reassembly_conf_get?
> > > > So user can retrieve current ip_reassembly config values?
> > > >
> > > The set/supported values can be retrieved using rte_eth_dev_info ::
> > reass_capa
> >
> > Hmm, I thought rte_eth_dev_info :: reass_capa reports
> > max supported values, not currently set values.
> > Did I misunderstand something?
> >
> Reassembly configuration is expected to be a one-time setting and is not expected
> to change multiple times in the application.
> You are correct that rte_eth_dev_info :: reass_capa reports max supported values
> by the PMD.
> But if somebody uses the _set API, dev_info values will be overwritten.
> However, a get API can be added, if we have some use case.
> IMO, we can add it later if it will be required.

Basically you forbid user to reconfigure this feature
during application life-time? 
That sounds like a really strange approach to me and
Probably will affect its usability in a negative way. 
Wonder why it has to be that restrictive?
Also with the model you suggest, what would happen after user will do:
dev_stop(); dev_configure();?
Would rte_eth_dev_info :: reass_capa be reset to initial values,
or user values will be preserved, or ...?



^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 3/8] ethdev: add mbuf dynfield for incomplete IP reassembly
  2022-01-12 10:30         ` Ananyev, Konstantin
@ 2022-01-12 10:59           ` Akhil Goyal
  2022-01-13 22:29             ` Ananyev, Konstantin
  0 siblings, 1 reply; 53+ messages in thread
From: Akhil Goyal @ 2022-01-12 10:59 UTC (permalink / raw)
  To: Ananyev, Konstantin, dev
  Cc: Anoob Joseph, Nicolau, Radu, Doherty, Declan, hemant.agrawal,
	matan, thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu,
	Rosen

> > > >
> > > > +/**
> > > > + * In case of IP reassembly offload failure, ol_flags in mbuf will be set
> > > > + * with RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE and packets will
> be
> > > returned
> > > > + * without alteration. The application can retrieve the attached fragments
> > > > + * using mbuf dynamic field.
> > > > + */
> > > > +typedef struct {
> > > > +	/**
> > > > +	 * Next fragment packet. Application should fetch dynamic field of
> > > > +	 * each fragment until a NULL is received and nb_frags is 0.
> > > > +	 */
> > > > +	struct rte_mbuf *next_frag;
> > > > +	/** Time spent(in ms) by HW in waiting for further fragments. */
> > > > +	uint16_t time_spent;
> > > > +	/** Number of more fragments attached in mbuf dynamic fields. */
> > > > +	uint16_t nb_frags;
> > > > +} rte_eth_ip_reass_dynfield_t;
> > >
> > >
> > > Looks like a bit of overkill to me:
> > > We do already have 'next' and 'nb_frags' fields inside mbuf,
> > > why can't they be used here? Why a separate ones are necessary?
> > >
> > The next and nb_frags in mbuf is for segmented buffers and not IP fragments.
> > But here we will have separate mbufs in each dynfield denoting each of the
> > fragments which may have further segmented buffers.
> 
> Makes sense, thanks for explanation.
> Though in that case just 'struct rte_mbuf *next_frag' might be enough
> (user will walk though the list till mbuf->next_frag != NULL)?
> The reason I am asking: current sizeof(rte_eth_ip_reass_dynfield_t) is 16B,
> which is quite a lot for mbuf, especially considering that it has to be continuous
> 16B.
> Making it smaller (8B) or even splitting into 2 fileds (8+4) will give it more
> chances
> to coexist with other dynfields.

Even if we drop nb_frags, we will be left with uint16_t time_spent.
Are you suggesting to use separate dynfield altogether for 2 bytes of time_spent?


^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 2/8] ethdev: add dev op for IP reassembly configuration
  2022-01-12 10:48             ` Ananyev, Konstantin
@ 2022-01-12 11:06               ` Akhil Goyal
  2022-01-13 13:31                 ` Akhil Goyal
  0 siblings, 1 reply; 53+ messages in thread
From: Akhil Goyal @ 2022-01-12 11:06 UTC (permalink / raw)
  To: Ananyev, Konstantin, dev
  Cc: Anoob Joseph, Nicolau, Radu, Doherty, Declan, hemant.agrawal,
	matan, thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu,
	Rosen

> > > > > Another question - if we have reassembly_conf_set() would it make sense
> to
> > > > > have also reassembly_conf_get?
> > > > > So user can retrieve current ip_reassembly config values?
> > > > >
> > > > The set/supported values can be retrieved using rte_eth_dev_info ::
> > > reass_capa
> > >
> > > Hmm, I thought rte_eth_dev_info :: reass_capa reports
> > > max supported values, not currently set values.
> > > Did I misunderstand something?
> > >
> > Reassembly configuration is expected to be a one-time setting and is not
> expected
> > to change multiple times in the application.
> > You are correct that rte_eth_dev_info :: reass_capa reports max supported
> values
> > by the PMD.
> > But if somebody uses the _set API, dev_info values will be overwritten.
> > However, a get API can be added, if we have some use case.
> > IMO, we can add it later if it will be required.
> 
> Basically you forbid user to reconfigure this feature
> during application life-time?
> That sounds like a really strange approach to me and
> Probably will affect its usability in a negative way.
> Wonder why it has to be that restrictive?
> Also with the model you suggest, what would happen after user will do:
> dev_stop(); dev_configure();?
> Would rte_eth_dev_info :: reass_capa be reset to initial values,
> or user values will be preserved, or ...?
> 
I am not restricting the user to not reconfigure the feature.
When dev_configure() is called again after dev_stop(), it will reset the previously
set values to max ones.
However, if you insist the get API can be added. No strong opinion on that.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 3/8] ethdev: add mbuf dynfield for incomplete IP reassembly
  2022-01-11 18:44       ` Akhil Goyal
  2022-01-12 10:30         ` Ananyev, Konstantin
@ 2022-01-13 13:18         ` Akhil Goyal
  2022-01-13 14:36           ` Ananyev, Konstantin
  1 sibling, 1 reply; 53+ messages in thread
From: Akhil Goyal @ 2022-01-13 13:18 UTC (permalink / raw)
  To: Ananyev, Konstantin, dev
  Cc: Anoob Joseph, Nicolau, Radu, Doherty, Declan, hemant.agrawal,
	matan, thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu,
	Rosen

Hi Konstantin,
> > > Hardware IP reassembly may be incomplete for multiple reasons like
> > > reassembly timeout reached, duplicate fragments, etc.
> > > To save application cycles to process these packets again, a new
> > > mbuf ol_flag (RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE) is added to
> > > show that the mbuf received is not reassembled properly.
> >
> > If we use dynfiled for data, why not use dynflag for
> > RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE?
> > That way we can avoid introduced hardcoded (always defined) flags for that
> > case.
> 
> I have not looked into using dynflag. Will explore if it can be used.
The intent of adding this feature is to reduce application cycles for IP reassembly.
But if we use dynflag, it will take a lot of cycles to check if dyn flag is set or not.
As I understand, it first need to be looked up in a linked list and then checked.
And this will be checked for each packet even if there is no reassembly involved.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 2/8] ethdev: add dev op for IP reassembly configuration
  2022-01-12 11:06               ` Akhil Goyal
@ 2022-01-13 13:31                 ` Akhil Goyal
  2022-01-13 14:41                   ` Ananyev, Konstantin
  0 siblings, 1 reply; 53+ messages in thread
From: Akhil Goyal @ 2022-01-13 13:31 UTC (permalink / raw)
  To: Ananyev, Konstantin, dev
  Cc: Anoob Joseph, Nicolau, Radu, Doherty, Declan, hemant.agrawal,
	matan, thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu,
	Rosen

Hi Konstantin,

> > > > > > Another question - if we have reassembly_conf_set() would it make
> sense
> > to
> > > > > > have also reassembly_conf_get?
> > > > > > So user can retrieve current ip_reassembly config values?
> > > > > >
> > > > > The set/supported values can be retrieved using rte_eth_dev_info ::
> > > > reass_capa
> > > >
> > > > Hmm, I thought rte_eth_dev_info :: reass_capa reports
> > > > max supported values, not currently set values.
> > > > Did I misunderstand something?
> > > >
> > > Reassembly configuration is expected to be a one-time setting and is not
> > expected
> > > to change multiple times in the application.
> > > You are correct that rte_eth_dev_info :: reass_capa reports max supported
> > values
> > > by the PMD.
> > > But if somebody uses the _set API, dev_info values will be overwritten.
> > > However, a get API can be added, if we have some use case.
> > > IMO, we can add it later if it will be required.
> >
> > Basically you forbid user to reconfigure this feature
> > during application life-time?
> > That sounds like a really strange approach to me and
> > Probably will affect its usability in a negative way.
> > Wonder why it has to be that restrictive?
> > Also with the model you suggest, what would happen after user will do:
> > dev_stop(); dev_configure();?
> > Would rte_eth_dev_info :: reass_capa be reset to initial values,
> > or user values will be preserved, or ...?
> >
> I am not restricting the user to not reconfigure the feature.
> When dev_configure() is called again after dev_stop(), it will reset the previously
> set values to max ones.
> However, if you insist the get API can be added. No strong opinion on that.

On another thought, setting dev_info :: reass_capa to a max value and not changing it
in reassembly_conf_set() will make more sense.
The most common case, would be to get the max values and if they are not good
Enough for the application, set lesser values using the new API.
I do not see a use case to get the current values set. However, it may be used for debugging
some driver issue related to these values. But, I believe that can be managed internally
in the PMD. Do you suspect any other use case for get API?


^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 3/8] ethdev: add mbuf dynfield for incomplete IP reassembly
  2022-01-13 13:18         ` Akhil Goyal
@ 2022-01-13 14:36           ` Ananyev, Konstantin
  2022-01-13 15:04             ` Akhil Goyal
  0 siblings, 1 reply; 53+ messages in thread
From: Ananyev, Konstantin @ 2022-01-13 14:36 UTC (permalink / raw)
  To: Akhil Goyal, dev
  Cc: Anoob Joseph, Nicolau, Radu, Doherty, Declan, hemant.agrawal,
	matan, thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu,
	Rosen

Hi Akhil,

> Hi Konstantin,
> > > > Hardware IP reassembly may be incomplete for multiple reasons like
> > > > reassembly timeout reached, duplicate fragments, etc.
> > > > To save application cycles to process these packets again, a new
> > > > mbuf ol_flag (RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE) is added to
> > > > show that the mbuf received is not reassembled properly.
> > >
> > > If we use dynfiled for data, why not use dynflag for
> > > RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE?
> > > That way we can avoid introduced hardcoded (always defined) flags for that
> > > case.
> >
> > I have not looked into using dynflag. Will explore if it can be used.
> The intent of adding this feature is to reduce application cycles for IP reassembly.
> But if we use dynflag, it will take a lot of cycles to check if dyn flag is set or not.
> As I understand, it first need to be looked up in a linked list and then checked.
> And this will be checked for each packet even if there is no reassembly involved.

No, I don't think it is correct understanding.
For dyn-flag it is the same approach as for dyn-field.
At init time it selects the bit which will be used and return it'e value to the user.
Then user will set/check the at runtime.
So no linking list walks at runtime.
All you missing comparing to hard-coded values: complier optimizations.  



^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 2/8] ethdev: add dev op for IP reassembly configuration
  2022-01-13 13:31                 ` Akhil Goyal
@ 2022-01-13 14:41                   ` Ananyev, Konstantin
  0 siblings, 0 replies; 53+ messages in thread
From: Ananyev, Konstantin @ 2022-01-13 14:41 UTC (permalink / raw)
  To: Akhil Goyal, dev
  Cc: Anoob Joseph, Nicolau, Radu, Doherty, Declan, hemant.agrawal,
	matan, thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu,
	Rosen



> > > > > > > Another question - if we have reassembly_conf_set() would it make
> > sense
> > > to
> > > > > > > have also reassembly_conf_get?
> > > > > > > So user can retrieve current ip_reassembly config values?
> > > > > > >
> > > > > > The set/supported values can be retrieved using rte_eth_dev_info ::
> > > > > reass_capa
> > > > >
> > > > > Hmm, I thought rte_eth_dev_info :: reass_capa reports
> > > > > max supported values, not currently set values.
> > > > > Did I misunderstand something?
> > > > >
> > > > Reassembly configuration is expected to be a one-time setting and is not
> > > expected
> > > > to change multiple times in the application.
> > > > You are correct that rte_eth_dev_info :: reass_capa reports max supported
> > > values
> > > > by the PMD.
> > > > But if somebody uses the _set API, dev_info values will be overwritten.
> > > > However, a get API can be added, if we have some use case.
> > > > IMO, we can add it later if it will be required.
> > >
> > > Basically you forbid user to reconfigure this feature
> > > during application life-time?
> > > That sounds like a really strange approach to me and
> > > Probably will affect its usability in a negative way.
> > > Wonder why it has to be that restrictive?
> > > Also with the model you suggest, what would happen after user will do:
> > > dev_stop(); dev_configure();?
> > > Would rte_eth_dev_info :: reass_capa be reset to initial values,
> > > or user values will be preserved, or ...?
> > >
> > I am not restricting the user to not reconfigure the feature.
> > When dev_configure() is called again after dev_stop(), it will reset the previously
> > set values to max ones.
> > However, if you insist the get API can be added. No strong opinion on that.
> 
> On another thought, setting dev_info :: reass_capa to a max value and not changing it
> in reassembly_conf_set() will make more sense.

Yes, agree.

> The most common case, would be to get the max values and if they are not good
> Enough for the application, set lesser values using the new API.
> I do not see a use case to get the current values set. However, it may be used for debugging
> some driver issue related to these values. But, I believe that can be managed internally
> in the PMD. Do you suspect any other use case for get API?

I think it would be really plausible for both user and ethdev layer to have an ability to get
values that are currently in place.  


^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 3/8] ethdev: add mbuf dynfield for incomplete IP reassembly
  2022-01-13 14:36           ` Ananyev, Konstantin
@ 2022-01-13 15:04             ` Akhil Goyal
  0 siblings, 0 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-13 15:04 UTC (permalink / raw)
  To: Ananyev, Konstantin, dev
  Cc: Anoob Joseph, Nicolau, Radu, Doherty, Declan, hemant.agrawal,
	matan, thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu,
	Rosen

> Hi Akhil,
> 
> > Hi Konstantin,
> > > > > Hardware IP reassembly may be incomplete for multiple reasons like
> > > > > reassembly timeout reached, duplicate fragments, etc.
> > > > > To save application cycles to process these packets again, a new
> > > > > mbuf ol_flag (RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE) is added
> to
> > > > > show that the mbuf received is not reassembled properly.
> > > >
> > > > If we use dynfiled for data, why not use dynflag for
> > > > RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE?
> > > > That way we can avoid introduced hardcoded (always defined) flags for
> that
> > > > case.
> > >
> > > I have not looked into using dynflag. Will explore if it can be used.
> > The intent of adding this feature is to reduce application cycles for IP
> reassembly.
> > But if we use dynflag, it will take a lot of cycles to check if dyn flag is set or
> not.
> > As I understand, it first need to be looked up in a linked list and then checked.
> > And this will be checked for each packet even if there is no reassembly
> involved.
> 
> No, I don't think it is correct understanding.
> For dyn-flag it is the same approach as for dyn-field.
> At init time it selects the bit which will be used and return it'e value to the user.
> Then user will set/check the at runtime.
> So no linking list walks at runtime.
> All you missing comparing to hard-coded values: complier optimizations.
> 
Ok, got it. rte_mbuf_dynflag_lookup() need to happen only for the first mbuf.
I was checking is_timestamp_enabled() in test-pmd. Didn't see that dynflag was
a static variable.
I thought it was happening for each packet.


^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [PATCH 3/8] ethdev: add mbuf dynfield for incomplete IP reassembly
  2022-01-12 10:59           ` Akhil Goyal
@ 2022-01-13 22:29             ` Ananyev, Konstantin
  0 siblings, 0 replies; 53+ messages in thread
From: Ananyev, Konstantin @ 2022-01-13 22:29 UTC (permalink / raw)
  To: Akhil Goyal, dev
  Cc: Anoob Joseph, Nicolau, Radu, Doherty, Declan, hemant.agrawal,
	matan, thomas, Yigit, Ferruh, andrew.rybchenko, olivier.matz, Xu,
	Rosen



> > > > >
> > > > > +/**
> > > > > + * In case of IP reassembly offload failure, ol_flags in mbuf will be set
> > > > > + * with RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE and packets will
> > be
> > > > returned
> > > > > + * without alteration. The application can retrieve the attached fragments
> > > > > + * using mbuf dynamic field.
> > > > > + */
> > > > > +typedef struct {
> > > > > +	/**
> > > > > +	 * Next fragment packet. Application should fetch dynamic field of
> > > > > +	 * each fragment until a NULL is received and nb_frags is 0.
> > > > > +	 */
> > > > > +	struct rte_mbuf *next_frag;
> > > > > +	/** Time spent(in ms) by HW in waiting for further fragments. */
> > > > > +	uint16_t time_spent;
> > > > > +	/** Number of more fragments attached in mbuf dynamic fields. */
> > > > > +	uint16_t nb_frags;
> > > > > +} rte_eth_ip_reass_dynfield_t;
> > > >
> > > >
> > > > Looks like a bit of overkill to me:
> > > > We do already have 'next' and 'nb_frags' fields inside mbuf,
> > > > why can't they be used here? Why a separate ones are necessary?
> > > >
> > > The next and nb_frags in mbuf is for segmented buffers and not IP fragments.
> > > But here we will have separate mbufs in each dynfield denoting each of the
> > > fragments which may have further segmented buffers.
> >
> > Makes sense, thanks for explanation.
> > Though in that case just 'struct rte_mbuf *next_frag' might be enough
> > (user will walk though the list till mbuf->next_frag != NULL)?
> > The reason I am asking: current sizeof(rte_eth_ip_reass_dynfield_t) is 16B,
> > which is quite a lot for mbuf, especially considering that it has to be continuous
> > 16B.
> > Making it smaller (8B) or even splitting into 2 fileds (8+4) will give it more
> > chances
> > to coexist with other dynfields.
> 
> Even if we drop nb_frags, we will be left with uint16_t time_spent.
> Are you suggesting to use separate dynfield altogether for 2 bytes of time_spent?

Yes, that's was my thought - split it into two separate fields, if possible.
 
 



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v2 0/4] ethdev: introduce IP reassembly offload
  2022-01-03 15:08 ` [PATCH 0/8] ethdev: introduce IP " Akhil Goyal
                     ` (8 preceding siblings ...)
  2022-01-06  9:51   ` [PATCH 0/8] ethdev: introduce IP reassembly offload David Marchand
@ 2022-01-20 16:26   ` Akhil Goyal
  2022-01-20 16:26     ` [PATCH v2 1/4] " Akhil Goyal
                       ` (3 more replies)
  9 siblings, 4 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-20 16:26 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, jerinj, Akhil Goyal

As discussed in the RFC[1] sent in 21.11, a new offload is
introduced in ethdev for IP reassembly.

This patchset add the IP reassembly RX offload.
Currently, the offload is tested along with inline IPsec processing.
It can also be updated as a standalone offload without IPsec, if there
are some hardware available to test it.
The patchset is tested on cnxk platform. The driver implementation
and a test app are added as separate patchsets.

[1]: http://patches.dpdk.org/project/dpdk/patch/20210823100259.1619886-1-gakhil@marvell.com/

changes in v2:
- added abi ignore exceptions for modifications in reserved fields.
  Added a crude way to subside the rte_security and rte_ipsec ABI issue.
  Please suggest a better way.
- incorporated Konstantin's comment for extra checks in new API
  introduced.
- converted static mbuf ol_flag to mbuf dynflag (Konstantin)
- added a get API for reassembly configuration (Konstantin)
- Fixed checkpatch issues.
- Dynfield is NOT split into 2 parts as it would cause an extra fetch in
  case of IP reassembly failure.
- Application patches are split into a separate series.

Akhil Goyal (4):
  ethdev: introduce IP reassembly offload
  ethdev: add dev op to set/get IP reassembly configuration
  ethdev: add mbuf dynfield for incomplete IP reassembly
  security: add IPsec option for IP reassembly

 devtools/libabigail.abignore |  19 ++++++
 doc/guides/nics/features.rst |  11 ++++
 lib/ethdev/ethdev_driver.h   |  45 ++++++++++++++
 lib/ethdev/rte_ethdev.c      | 110 +++++++++++++++++++++++++++++++++++
 lib/ethdev/rte_ethdev.h      | 104 ++++++++++++++++++++++++++++++++-
 lib/ethdev/version.map       |   5 ++
 lib/security/rte_security.h  |  12 +++-
 7 files changed, 304 insertions(+), 2 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v2 1/4] ethdev: introduce IP reassembly offload
  2022-01-20 16:26   ` [PATCH v2 0/4] " Akhil Goyal
@ 2022-01-20 16:26     ` Akhil Goyal
  2022-01-20 16:45       ` Stephen Hemminger
  2022-01-20 16:26     ` [PATCH v2 2/4] ethdev: add dev op to set/get IP reassembly configuration Akhil Goyal
                       ` (2 subsequent siblings)
  3 siblings, 1 reply; 53+ messages in thread
From: Akhil Goyal @ 2022-01-20 16:26 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, jerinj, Akhil Goyal

IP Reassembly is a costly operation if it is done in software.
The operation becomes even more costlier if IP fragments are encrypted.
However, if it is offloaded to HW, it can considerably save application
cycles.

Hence, a new offload RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY is introduced in
ethdev for devices which can attempt reassembly of packets in hardware.
rte_eth_dev_info is updated with the reassembly capabilities which a device
can support.

The resulting reassembled packet would be a typical segmented mbuf in
case of success.

And if reassembly of fragments is failed or is incomplete (if fragments do
not come before the reass_timeout), the mbuf ol_flags can be updated.
This is updated in a subsequent patch.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
 devtools/libabigail.abignore |  5 +++++
 doc/guides/nics/features.rst | 11 +++++++++++
 lib/ethdev/rte_ethdev.c      |  1 +
 lib/ethdev/rte_ethdev.h      | 32 +++++++++++++++++++++++++++++++-
 4 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/devtools/libabigail.abignore b/devtools/libabigail.abignore
index 4b676f317d..90f449c43a 100644
--- a/devtools/libabigail.abignore
+++ b/devtools/libabigail.abignore
@@ -11,3 +11,8 @@
 ; Ignore generated PMD information strings
 [suppress_variable]
         name_regexp = _pmd_info$
+
+; Ignore fields inserted in place of reserved_64s of rte_eth_dev_info
+[suppress_type]
+	name = rte_eth_dev_info
+	has_data_member_inserted_between = {offset_of(reserved_64s), end}
diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
index 27be2d2576..b45bce4a78 100644
--- a/doc/guides/nics/features.rst
+++ b/doc/guides/nics/features.rst
@@ -602,6 +602,17 @@ Supports inner packet L4 checksum.
   ``tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM``.
 
 
+.. _nic_features_ip_reassembly:
+
+IP reassembly
+-------------
+
+Supports IP reassembly in hardware.
+
+* **[uses]     rte_eth_rxconf,rte_eth_rxmode**: ``offloads:RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY``.
+* **[provides] rte_eth_dev_info**: ``reass_capa``.
+
+
 .. _nic_features_shared_rx_queue:
 
 Shared Rx queue
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index a1d475a292..d9a03f12f9 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -126,6 +126,7 @@ static const struct {
 	RTE_RX_OFFLOAD_BIT2STR(OUTER_UDP_CKSUM),
 	RTE_RX_OFFLOAD_BIT2STR(RSS_HASH),
 	RTE_RX_OFFLOAD_BIT2STR(BUFFER_SPLIT),
+	RTE_RX_OFFLOAD_BIT2STR(IP_REASSEMBLY),
 };
 
 #undef RTE_RX_OFFLOAD_BIT2STR
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index fa299c8ad7..11427b2e4d 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1586,6 +1586,7 @@ struct rte_eth_conf {
 #define RTE_ETH_RX_OFFLOAD_RSS_HASH         RTE_BIT64(19)
 #define DEV_RX_OFFLOAD_RSS_HASH             RTE_ETH_RX_OFFLOAD_RSS_HASH
 #define RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT     RTE_BIT64(20)
+#define RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY    RTE_BIT64(21)
 
 #define RTE_ETH_RX_OFFLOAD_CHECKSUM (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \
 				 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | \
@@ -1781,6 +1782,33 @@ enum rte_eth_representor_type {
 	RTE_ETH_REPRESENTOR_PF,   /**< representor of Physical Function. */
 };
 
+/* Flag to offload IP reassembly for IPv4 packets. */
+#define RTE_ETH_DEV_REASSEMBLY_F_IPV4 (RTE_BIT32(0))
+/* Flag to offload IP reassembly for IPv6 packets. */
+#define RTE_ETH_DEV_REASSEMBLY_F_IPV6 (RTE_BIT32(1))
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice.
+ *
+ * A structure used to set IP reassembly configuration.
+ *
+ * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
+ * the PMD will attempt IP reassembly for the received packets as per
+ * properties defined in this structure:
+ *
+ */
+struct rte_eth_ip_reass_params {
+	/** Maximum time in ms which PMD can wait for other fragments. */
+	uint32_t reass_timeout;
+	/** Maximum number of fragments that can be reassembled. */
+	uint16_t max_frags;
+	/**
+	 * Flags to enable reassembly of packet types -
+	 * RTE_ETH_DEV_REASSEMBLY_F_xxx.
+	 */
+	uint16_t flags;
+};
+
 /**
  * A structure used to retrieve the contextual information of
  * an Ethernet device, such as the controlling driver of the
@@ -1841,8 +1869,10 @@ struct rte_eth_dev_info {
 	 * embedded managed interconnect/switch.
 	 */
 	struct rte_eth_switch_info switch_info;
+	/** IP reassembly offload capabilities that a device can support. */
+	struct rte_eth_ip_reass_params reass_capa;
 
-	uint64_t reserved_64s[2]; /**< Reserved for future fields */
+	uint64_t reserved_64s[1]; /**< Reserved for future fields */
 	void *reserved_ptrs[2];   /**< Reserved for future fields */
 };
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v2 2/4] ethdev: add dev op to set/get IP reassembly configuration
  2022-01-20 16:26   ` [PATCH v2 0/4] " Akhil Goyal
  2022-01-20 16:26     ` [PATCH v2 1/4] " Akhil Goyal
@ 2022-01-20 16:26     ` Akhil Goyal
  2022-01-22  8:17       ` Andrew Rybchenko
  2022-01-20 16:26     ` [PATCH v2 3/4] ethdev: add mbuf dynfield for incomplete IP reassembly Akhil Goyal
  2022-01-20 16:26     ` [PATCH v2 4/4] security: add IPsec option for " Akhil Goyal
  3 siblings, 1 reply; 53+ messages in thread
From: Akhil Goyal @ 2022-01-20 16:26 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, jerinj, Akhil Goyal

A new ethernet device op is added to give application control over
the IP reassembly configuration. This operation is an optional
call from the application, default values are set by PMD and
exposed via rte_eth_dev_info.
Application should always first retrieve the capabilities from
rte_eth_dev_info and then set the fields accordingly.
User can get the currently set values using the get API.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
---
 lib/ethdev/ethdev_driver.h | 37 +++++++++++++++++
 lib/ethdev/rte_ethdev.c    | 81 ++++++++++++++++++++++++++++++++++++++
 lib/ethdev/rte_ethdev.h    | 51 ++++++++++++++++++++++++
 lib/ethdev/version.map     |  4 ++
 4 files changed, 173 insertions(+)

diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index d95605a355..a310001648 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -990,6 +990,38 @@ typedef int (*eth_representor_info_get_t)(struct rte_eth_dev *dev,
 typedef int (*eth_rx_metadata_negotiate_t)(struct rte_eth_dev *dev,
 				       uint64_t *features);
 
+/**
+ * @internal
+ * Get IP reassembly offload configuration parameters set in PMD.
+ *
+ * @param dev
+ *   Port (ethdev) handle
+ *
+ * @param[out] conf
+ *   Configuration parameters for IP reassembly.
+ *
+ * @return
+ *   Negative errno value on error, zero otherwise
+ */
+typedef int (*eth_ip_reassembly_conf_get_t)(struct rte_eth_dev *dev,
+				       struct rte_eth_ip_reass_params *conf);
+
+/**
+ * @internal
+ * Set configuration parameters for enabling IP reassembly offload in hardware.
+ *
+ * @param dev
+ *   Port (ethdev) handle
+ *
+ * @param[in] conf
+ *   Configuration parameters for IP reassembly.
+ *
+ * @return
+ *   Negative errno value on error, zero otherwise
+ */
+typedef int (*eth_ip_reassembly_conf_set_t)(struct rte_eth_dev *dev,
+				       struct rte_eth_ip_reass_params *conf);
+
 /**
  * @internal A structure containing the functions exported by an Ethernet driver.
  */
@@ -1186,6 +1218,11 @@ struct eth_dev_ops {
 	 * kinds of metadata to the PMD
 	 */
 	eth_rx_metadata_negotiate_t rx_metadata_negotiate;
+
+	/** Get IP reassembly configuration */
+	eth_ip_reassembly_conf_get_t ip_reassembly_conf_get;
+	/** Set IP reassembly configuration */
+	eth_ip_reassembly_conf_set_t ip_reassembly_conf_set;
 };
 
 /**
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index d9a03f12f9..4bd31034a6 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -6473,6 +6473,87 @@ rte_eth_rx_metadata_negotiate(uint16_t port_id, uint64_t *features)
 		       (*dev->dev_ops->rx_metadata_negotiate)(dev, features));
 }
 
+int
+rte_eth_ip_reassembly_conf_set(uint16_t port_id,
+			       struct rte_eth_ip_reass_params *conf)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	if (dev->data->dev_configured == 0) {
+		RTE_ETHDEV_LOG(ERR,
+			"Device with port_id=%"PRIu16" is not configured.\n",
+			port_id);
+		return -EINVAL;
+	}
+
+	if (dev->data->dev_started != 0) {
+		RTE_ETHDEV_LOG(ERR,
+			"Device with port_id=%"PRIu16" started,\n"
+			"cannot configure IP reassembly params.\n",
+			port_id);
+		return -EINVAL;
+	}
+
+	if ((dev->data->dev_conf.rxmode.offloads &
+			RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY) == 0) {
+		RTE_ETHDEV_LOG(ERR,
+			"The port (ID=%"PRIu16") is not configured for IP reassembly\n",
+			port_id);
+		return -EINVAL;
+	}
+
+
+	if (conf == NULL) {
+		RTE_ETHDEV_LOG(ERR,
+				"Invalid IP reassembly configuration (NULL)\n");
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->ip_reassembly_conf_set,
+				-ENOTSUP);
+	return eth_err(port_id,
+		       (*dev->dev_ops->ip_reassembly_conf_set)(dev, conf));
+}
+
+int
+rte_eth_ip_reassembly_conf_get(uint16_t port_id,
+			       struct rte_eth_ip_reass_params *conf)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	if (conf == NULL) {
+		RTE_ETHDEV_LOG(ERR, "Cannot get reassembly info to NULL");
+		return -EINVAL;
+	}
+
+	if (dev->data->dev_configured == 0) {
+		RTE_ETHDEV_LOG(ERR,
+			"Device with port_id=%"PRIu16" is not configured.\n",
+			port_id);
+		return -EINVAL;
+	}
+
+	if ((dev->data->dev_conf.rxmode.offloads &
+			RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY) == 0) {
+		RTE_ETHDEV_LOG(ERR,
+			"The port (ID=%"PRIu16") is not configured for IP reassembly\n",
+			port_id);
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->ip_reassembly_conf_get,
+				-ENOTSUP);
+	memset(conf, 0, sizeof(struct rte_eth_ip_reass_params));
+	return eth_err(port_id,
+		       (*dev->dev_ops->ip_reassembly_conf_get)(dev, conf));
+}
+
 RTE_LOG_REGISTER_DEFAULT(rte_eth_dev_logtype, INFO);
 
 RTE_INIT(ethdev_init_telemetry)
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 11427b2e4d..53af158bcb 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -5218,6 +5218,57 @@ int rte_eth_representor_info_get(uint16_t port_id,
 __rte_experimental
 int rte_eth_rx_metadata_negotiate(uint16_t port_id, uint64_t *features);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Get IP reassembly configuration parameters currently set in PMD,
+ * if device rx offload flag (RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY) is
+ * enabled and the PMD supports IP reassembly offload.
+ *
+ * @param port_id
+ *   The port identifier of the device.
+ * @param conf
+ *   A pointer to rte_eth_ip_reass_params structure.
+ * @return
+ *   - (-ENOTSUP) if offload configuration is not supported by device.
+ *   - (-EINVAL) if offload is not enabled in rte_eth_conf.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EIO) if device is removed.
+ *   - (0) on success.
+ */
+__rte_experimental
+int rte_eth_ip_reassembly_conf_get(uint16_t port_id,
+				   struct rte_eth_ip_reass_params *conf);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Set IP reassembly configuration parameters if device rx offload
+ * flag (RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY) is enabled and the PMD
+ * supports IP reassembly offload. User should first check the
+ * reass_capa in rte_eth_dev_info before setting the configuration.
+ * The values of configuration parameters must not exceed the device
+ * capabilities. The use of this API is optional and if called, it
+ * should be called before rte_eth_dev_start().
+ *
+ * @param port_id
+ *   The port identifier of the device.
+ * @param conf
+ *   A pointer to rte_eth_ip_reass_params structure.
+ * @return
+ *   - (-ENOTSUP) if offload configuration is not supported by device.
+ *   - (-EINVAL) if offload is not enabled in rte_eth_conf.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EIO) if device is removed.
+ *   - (0) on success.
+ */
+__rte_experimental
+int rte_eth_ip_reassembly_conf_set(uint16_t port_id,
+				   struct rte_eth_ip_reass_params *conf);
+
+
 #include <rte_ethdev_core.h>
 
 /**
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index c2fb0669a4..ad829dd47e 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -256,6 +256,10 @@ EXPERIMENTAL {
 	rte_flow_flex_item_create;
 	rte_flow_flex_item_release;
 	rte_flow_pick_transfer_proxy;
+
+	#added in 22.03
+	rte_eth_ip_reassembly_conf_get;
+	rte_eth_ip_reassembly_conf_set;
 };
 
 INTERNAL {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v2 3/4] ethdev: add mbuf dynfield for incomplete IP reassembly
  2022-01-20 16:26   ` [PATCH v2 0/4] " Akhil Goyal
  2022-01-20 16:26     ` [PATCH v2 1/4] " Akhil Goyal
  2022-01-20 16:26     ` [PATCH v2 2/4] ethdev: add dev op to set/get IP reassembly configuration Akhil Goyal
@ 2022-01-20 16:26     ` Akhil Goyal
  2022-01-20 16:26     ` [PATCH v2 4/4] security: add IPsec option for " Akhil Goyal
  3 siblings, 0 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-20 16:26 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, jerinj, Akhil Goyal

Hardware IP reassembly may be incomplete for multiple reasons like
reassembly timeout reached, duplicate fragments, etc.
To save application cycles to process these packets again, a new
mbuf dynflag is added to show that the mbuf received is not
reassembled properly.

Now if this dynflag is set, application can retrieve corresponding
chain of mbufs using mbuf dynfield set by the PMD. Now, it will be
up to application to either drop those fragments or wait for more time.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
---
 lib/ethdev/ethdev_driver.h |  8 ++++++++
 lib/ethdev/rte_ethdev.c    | 28 ++++++++++++++++++++++++++++
 lib/ethdev/rte_ethdev.h    | 21 +++++++++++++++++++++
 lib/ethdev/version.map     |  1 +
 4 files changed, 58 insertions(+)

diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index a310001648..7499a4fbf5 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -1689,6 +1689,14 @@ int
 rte_eth_hairpin_queue_peer_unbind(uint16_t cur_port, uint16_t cur_queue,
 				  uint32_t direction);
 
+/**
+ * @internal
+ * Register mbuf dynamic field and flag for IP reassembly incomplete case.
+ */
+__rte_internal
+int
+rte_eth_ip_reass_dynfield_register(int *field_offset, int *flag);
+
 
 /*
  * Legacy ethdev API used internally by drivers.
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 4bd31034a6..f6a155dceb 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -6554,6 +6554,34 @@ rte_eth_ip_reassembly_conf_get(uint16_t port_id,
 		       (*dev->dev_ops->ip_reassembly_conf_get)(dev, conf));
 }
 
+int
+rte_eth_ip_reass_dynfield_register(int *field_offset, int *flag_offset)
+{
+	static const struct rte_mbuf_dynfield field_desc = {
+		.name = RTE_ETH_IP_REASS_DYNFIELD_NAME,
+		.size = sizeof(rte_eth_ip_reass_dynfield_t),
+		.align = __alignof__(rte_eth_ip_reass_dynfield_t),
+	};
+	static const struct rte_mbuf_dynflag ip_reass_dynflag = {
+		.name = RTE_ETH_IP_REASS_INCOMPLETE_DYNFLAG_NAME,
+	};
+	int offset;
+
+	offset = rte_mbuf_dynfield_register(&field_desc);
+	if (offset < 0)
+		return -1;
+	if (field_offset != NULL)
+		*field_offset = offset;
+
+	offset = rte_mbuf_dynflag_register(&ip_reass_dynflag);
+	if (offset < 0)
+		return -1;
+	if (flag_offset != NULL)
+		*flag_offset = offset;
+
+	return 0;
+}
+
 RTE_LOG_REGISTER_DEFAULT(rte_eth_dev_logtype, INFO);
 
 RTE_INIT(ethdev_init_telemetry)
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 53af158bcb..a6b43bcf2c 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -5268,6 +5268,27 @@ __rte_experimental
 int rte_eth_ip_reassembly_conf_set(uint16_t port_id,
 				   struct rte_eth_ip_reass_params *conf);
 
+#define RTE_ETH_IP_REASS_DYNFIELD_NAME "rte_eth_ip_reass_dynfield"
+#define RTE_ETH_IP_REASS_INCOMPLETE_DYNFLAG_NAME "rte_eth_ip_reass_incomplete_dynflag"
+
+/**
+ * In case of IP reassembly offload failure, ol_flags in mbuf will be set
+ * with RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE and packets will be returned
+ * without alteration. The application can retrieve the attached fragments
+ * using mbuf dynamic field.
+ */
+typedef struct {
+	/**
+	 * Next fragment packet. Application should fetch dynamic field of
+	 * each fragment until a NULL is received and nb_frags is 0.
+	 */
+	struct rte_mbuf *next_frag;
+	/** Time spent(in ms) by HW in waiting for further fragments. */
+	uint16_t time_spent;
+	/** Number of more fragments attached in mbuf dynamic fields. */
+	uint16_t nb_frags;
+} rte_eth_ip_reass_dynfield_t;
+
 
 #include <rte_ethdev_core.h>
 
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index ad829dd47e..8b7578471a 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -283,6 +283,7 @@ INTERNAL {
 	rte_eth_hairpin_queue_peer_bind;
 	rte_eth_hairpin_queue_peer_unbind;
 	rte_eth_hairpin_queue_peer_update;
+	rte_eth_ip_reass_dynfield_register;
 	rte_eth_representor_id_get;
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v2 4/4] security: add IPsec option for IP reassembly
  2022-01-20 16:26   ` [PATCH v2 0/4] " Akhil Goyal
                       ` (2 preceding siblings ...)
  2022-01-20 16:26     ` [PATCH v2 3/4] ethdev: add mbuf dynfield for incomplete IP reassembly Akhil Goyal
@ 2022-01-20 16:26     ` Akhil Goyal
  3 siblings, 0 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-20 16:26 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, jerinj, Akhil Goyal

A new option is added in IPsec to enable and attempt reassembly
of inbound packets.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
---
 devtools/libabigail.abignore | 14 ++++++++++++++
 lib/security/rte_security.h  | 12 +++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/devtools/libabigail.abignore b/devtools/libabigail.abignore
index 90f449c43a..c6e304282f 100644
--- a/devtools/libabigail.abignore
+++ b/devtools/libabigail.abignore
@@ -16,3 +16,17 @@
 [suppress_type]
 	name = rte_eth_dev_info
 	has_data_member_inserted_between = {offset_of(reserved_64s), end}
+
+; Ignore fields inserted in place of reserved_opts of rte_security_ipsec_sa_options
+[suppress_type]
+       name = rte_ipsec_sa_prm
+       name = rte_security_ipsec_sa_options
+       has_data_member_inserted_between = {offset_of(reserved_opts), end}
+
+[suppress_type]
+       name = rte_security_capability
+       has_data_member_inserted_between = {offset_of(reserved_opts), (offset_of(reserved_opts) + 18)}
+
+[suppress_type]
+       name = rte_security_session_conf
+       has_data_member_inserted_between = {offset_of(reserved_opts), (offset_of(reserved_opts) + 18)}
diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
index 1228b6c8b1..168b837a82 100644
--- a/lib/security/rte_security.h
+++ b/lib/security/rte_security.h
@@ -264,6 +264,16 @@ struct rte_security_ipsec_sa_options {
 	 */
 	uint32_t l4_csum_enable : 1;
 
+	/** Enable reassembly on incoming packets.
+	 *
+	 * * 1: Enable driver to try reassembly of encrypted IP packets for
+	 *      this SA, if supported by the driver. This feature will work
+	 *      only if rx_offload RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY is set in
+	 *      inline Ethernet device.
+	 * * 0: Disable reassembly of packets (default).
+	 */
+	uint32_t reass_en : 1;
+
 	/** Reserved bit fields for future extension
 	 *
 	 * User should ensure reserved_opts is cleared as it may change in
@@ -271,7 +281,7 @@ struct rte_security_ipsec_sa_options {
 	 *
 	 * Note: Reduce number of bits in reserved_opts for every new option.
 	 */
-	uint32_t reserved_opts : 18;
+	uint32_t reserved_opts : 17;
 };
 
 /** IPSec security association direction */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v2 1/4] ethdev: introduce IP reassembly offload
  2022-01-20 16:26     ` [PATCH v2 1/4] " Akhil Goyal
@ 2022-01-20 16:45       ` Stephen Hemminger
  2022-01-20 17:11         ` [EXT] " Akhil Goyal
  0 siblings, 1 reply; 53+ messages in thread
From: Stephen Hemminger @ 2022-01-20 16:45 UTC (permalink / raw)
  To: Akhil Goyal
  Cc: dev, anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, jerinj

On Thu, 20 Jan 2022 21:56:24 +0530
Akhil Goyal <gakhil@marvell.com> wrote:

> +/**
> + * @warning
> + * @b EXPERIMENTAL: this structure may change without prior notice.
> + *
> + * A structure used to set IP reassembly configuration.
> + *
> + * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
> + * the PMD will attempt IP reassembly for the received packets as per
> + * properties defined in this structure:
> + *
> + */
> +struct rte_eth_ip_reass_params {
> +	/** Maximum time in ms which PMD can wait for other fragments. */
> +	uint32_t reass_timeout;
> +	/** Maximum number of fragments that can be reassembled. */
> +	uint16_t max_frags;
> +	/**
> +	 * Flags to enable reassembly of packet types -
> +	 * RTE_ETH_DEV_REASSEMBLY_F_xxx.
> +	 */
> +	uint16_t flags;
> +};
> +

Actually, this is not experimental. You are embedding this in dev_info
and dev_info is not experimental; therefore the reassembly parameters
can never change without breaking ABI of dev_info.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v2 0/4] app/test: add inline IPsec and reassembly cases
  2022-01-03 15:08   ` [PATCH 5/8] app/test: add unit cases for inline IPsec offload Akhil Goyal
@ 2022-01-20 16:48     ` Akhil Goyal
  2022-01-20 16:48       ` [PATCH v2 1/4] app/test: add unit cases for inline IPsec offload Akhil Goyal
                         ` (3 more replies)
  0 siblings, 4 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-20 16:48 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, jerinj, Akhil Goyal

IP reassembly RX offload is introduced in [1].
This patchset is added to test the IP reassembly RX offload and
to test other inline IPsec test cases which need to be verified
before testing IP reassembly in inline inbound cases.
In this app, plain IP packets(with/without IP fragments) are sent
on one interface for outbound processing and then the packets are
received back on the same interface using loopback mode.
While receiving the packets, the packets are processed for inline
inbound IPsec processing and if the packets are fragmented, they will
be reassembled before getting received in the driver/app.

v1 of this patchset was sent along with the ethdev changes in [2].
v2 is split so that it can be reviewed separately.

changes in v2:
- added IPsec burst mode case
- updated as per the latest ethdev changes in [1].

[1] http://patches.dpdk.org/project/dpdk/list/?series=21283
[2] http://patches.dpdk.org/project/dpdk/list/?series=21052


Akhil Goyal (4):
  app/test: add unit cases for inline IPsec offload
  app/test: add IP reassembly case with no frags
  app/test: add IP reassembly cases with multiple fragments
  app/test: add IP reassembly negative cases

 MAINTAINERS                                   |    2 +-
 app/test/meson.build                          |    1 +
 app/test/test_security_inline_proto.c         | 1299 +++++++++++++++++
 app/test/test_security_inline_proto_vectors.h |  778 ++++++++++
 4 files changed, 2079 insertions(+), 1 deletion(-)
 create mode 100644 app/test/test_security_inline_proto.c
 create mode 100644 app/test/test_security_inline_proto_vectors.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v2 1/4] app/test: add unit cases for inline IPsec offload
  2022-01-20 16:48     ` [PATCH v2 0/4] app/test: add inline IPsec and reassembly cases Akhil Goyal
@ 2022-01-20 16:48       ` Akhil Goyal
  2022-01-20 16:48       ` [PATCH v2 2/4] app/test: add IP reassembly case with no frags Akhil Goyal
                         ` (2 subsequent siblings)
  3 siblings, 0 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-20 16:48 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, jerinj, Akhil Goyal, Nithin Dabilpuram

A new test suite is added in test app to test inline IPsec protocol
offload. In this patch, a couple of predefined plain and cipher test
vectors are used to verify the IPsec functionality without the need of
external traffic generators. The sent packet is loopbacked onto the same
interface which is received and matched with the expected output.
The test suite can be updated further with other functional test cases.
The testsuite can be run using:
RTE> inline_ipsec_autotest

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
---
 MAINTAINERS                                   |   2 +-
 app/test/meson.build                          |   1 +
 app/test/test_security_inline_proto.c         | 758 ++++++++++++++++++
 app/test/test_security_inline_proto_vectors.h | 185 +++++
 4 files changed, 945 insertions(+), 1 deletion(-)
 create mode 100644 app/test/test_security_inline_proto.c
 create mode 100644 app/test/test_security_inline_proto_vectors.h

diff --git a/MAINTAINERS b/MAINTAINERS
index f46cec0c55..832bff3609 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -439,7 +439,7 @@ M: Declan Doherty <declan.doherty@intel.com>
 T: git://dpdk.org/next/dpdk-next-crypto
 F: lib/security/
 F: doc/guides/prog_guide/rte_security.rst
-F: app/test/test_security.c
+F: app/test/test_security*
 
 Compression API - EXPERIMENTAL
 M: Fan Zhang <roy.fan.zhang@intel.com>
diff --git a/app/test/meson.build b/app/test/meson.build
index 344a609a4d..2161afa7be 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -131,6 +131,7 @@ test_sources = files(
         'test_rwlock.c',
         'test_sched.c',
         'test_security.c',
+        'test_security_inline_proto.c',
         'test_service_cores.c',
         'test_spinlock.c',
         'test_stack.c',
diff --git a/app/test/test_security_inline_proto.c b/app/test/test_security_inline_proto.c
new file mode 100644
index 0000000000..4738792cb8
--- /dev/null
+++ b/app/test/test_security_inline_proto.c
@@ -0,0 +1,758 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <unistd.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_security.h>
+#include <rte_ipsec.h>
+#include <rte_byteorder.h>
+#include <rte_atomic.h>
+#include <rte_malloc.h>
+#include "test_security_inline_proto_vectors.h"
+#include "test.h"
+
+#define NB_ETHPORTS_USED                (1)
+#define NB_SOCKETS                      (2)
+#define MEMPOOL_CACHE_SIZE 32
+#define MAX_PKT_BURST                   (32)
+#define RTE_TEST_RX_DESC_DEFAULT        (1024)
+#define RTE_TEST_TX_DESC_DEFAULT        (1024)
+#define RTE_PORT_ALL            (~(uint16_t)0x0)
+
+/*
+ * RX and TX Prefetch, Host, and Write-back threshold values should be
+ * carefully set for optimal performance. Consult the network
+ * controller's datasheet and supporting DPDK documentation for guidance
+ * on how these parameters should be set.
+ */
+#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */
+#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */
+#define RX_WTHRESH 0 /**< Default values of RX write-back threshold reg. */
+
+#define TX_PTHRESH 32 /**< Default values of TX prefetch threshold reg. */
+#define TX_HTHRESH 0  /**< Default values of TX host threshold reg. */
+#define TX_WTHRESH 0  /**< Default values of TX write-back threshold reg. */
+
+#define MAX_TRAFFIC_BURST              2048
+
+#define NB_MBUF 1024
+
+#define APP_REASS_TIMEOUT		10
+
+static struct rte_mempool *mbufpool[NB_SOCKETS];
+static struct rte_mempool *sess_pool[NB_SOCKETS];
+static struct rte_mempool *sess_priv_pool[NB_SOCKETS];
+/* ethernet addresses of ports */
+static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+
+static struct rte_eth_conf port_conf = {
+	.rxmode = {
+		.mq_mode = RTE_ETH_MQ_RX_NONE,
+		.split_hdr_size = 0,
+		.offloads = RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY |
+			    RTE_ETH_RX_OFFLOAD_CHECKSUM |
+			    RTE_ETH_RX_OFFLOAD_SECURITY,
+	},
+	.txmode = {
+		.mq_mode = RTE_ETH_MQ_TX_NONE,
+		.offloads = RTE_ETH_TX_OFFLOAD_SECURITY |
+			    RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE,
+	},
+	.lpbk_mode = 1,  /* enable loopback */
+};
+
+static struct rte_eth_rxconf rx_conf = {
+	.rx_thresh = {
+		.pthresh = RX_PTHRESH,
+		.hthresh = RX_HTHRESH,
+		.wthresh = RX_WTHRESH,
+	},
+	.rx_free_thresh = 32,
+};
+
+static struct rte_eth_txconf tx_conf = {
+	.tx_thresh = {
+		.pthresh = TX_PTHRESH,
+		.hthresh = TX_HTHRESH,
+		.wthresh = TX_WTHRESH,
+	},
+	.tx_free_thresh = 32, /* Use PMD default values */
+	.tx_rs_thresh = 32, /* Use PMD default values */
+};
+
+enum {
+	LCORE_INVALID = 0,
+	LCORE_AVAIL,
+	LCORE_USED,
+};
+
+struct lcore_cfg {
+	uint8_t status;
+	uint8_t socketid;
+	uint16_t nb_ports;
+	uint16_t port;
+} __rte_cache_aligned;
+
+struct lcore_cfg lcore_cfg;
+
+static uint64_t link_mbps;
+
+static struct rte_flow *default_flow[RTE_MAX_ETHPORTS];
+
+/* Create Inline IPsec session */
+static int
+create_inline_ipsec_session(struct ipsec_session_data *sa,
+		uint16_t portid, struct rte_ipsec_session *ips,
+		enum rte_security_ipsec_sa_direction dir,
+		enum rte_security_ipsec_tunnel_type tun_type)
+{
+	int32_t ret = 0;
+	struct rte_security_ctx *sec_ctx;
+	uint32_t src_v4 = rte_cpu_to_be_32(RTE_IPV4(192, 168, 1, 2));
+	uint32_t dst_v4 = rte_cpu_to_be_32(RTE_IPV4(192, 168, 1, 1));
+	uint16_t src_v6[8] = {0x2607, 0xf8b0, 0x400c, 0x0c03, 0x0000, 0x0000,
+				0x0000, 0x001a};
+	uint16_t dst_v6[8] = {0x2001, 0x0470, 0xe5bf, 0xdead, 0x4957, 0x2174,
+				0xe82c, 0x4887};
+	struct rte_security_session_conf sess_conf = {
+		.action_type = RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL,
+		.protocol = RTE_SECURITY_PROTOCOL_IPSEC,
+		.ipsec = sa->ipsec_xform,
+		.crypto_xform = &sa->xform.aead,
+		.userdata = NULL,
+	};
+	sess_conf.ipsec.direction = dir;
+
+	const struct rte_security_capability *sec_cap;
+
+	sec_ctx = (struct rte_security_ctx *)
+			rte_eth_dev_get_sec_ctx(portid);
+
+	if (sec_ctx == NULL) {
+		printf("Ethernet device doesn't support security features.\n");
+		return TEST_SKIPPED;
+	}
+
+	sess_conf.crypto_xform->aead.key.data = sa->key.data;
+
+	/* Save SA as userdata for the security session. When
+	 * the packet is received, this userdata will be
+	 * retrieved using the metadata from the packet.
+	 *
+	 * The PMD is expected to set similar metadata for other
+	 * operations, like rte_eth_event, which are tied to
+	 * security session. In such cases, the userdata could
+	 * be obtained to uniquely identify the security
+	 * parameters denoted.
+	 */
+
+	sess_conf.userdata = (void *) sa;
+	sess_conf.ipsec.tunnel.type = tun_type;
+	if (tun_type == RTE_SECURITY_IPSEC_TUNNEL_IPV4) {
+		memcpy(&sess_conf.ipsec.tunnel.ipv4.src_ip, &src_v4,
+				sizeof(src_v4));
+		memcpy(&sess_conf.ipsec.tunnel.ipv4.dst_ip, &dst_v4,
+				sizeof(dst_v4));
+	} else {
+		memcpy(&sess_conf.ipsec.tunnel.ipv6.src_addr, &src_v6,
+				sizeof(src_v6));
+		memcpy(&sess_conf.ipsec.tunnel.ipv6.dst_addr, &dst_v6,
+				sizeof(dst_v6));
+	}
+	ips->security.ses = rte_security_session_create(sec_ctx,
+				&sess_conf, sess_pool[lcore_cfg.socketid],
+				sess_priv_pool[lcore_cfg.socketid]);
+	if (ips->security.ses == NULL) {
+		printf("SEC Session init failed: err: %d\n", ret);
+		return TEST_FAILED;
+	}
+
+	sec_cap = rte_security_capabilities_get(sec_ctx);
+	if (sec_cap == NULL) {
+		printf("No capabilities registered\n");
+		return TEST_SKIPPED;
+	}
+
+	/* iterate until ESP tunnel*/
+	while (sec_cap->action !=
+			RTE_SECURITY_ACTION_TYPE_NONE) {
+		if (sec_cap->action == sess_conf.action_type &&
+		    sec_cap->protocol ==
+			RTE_SECURITY_PROTOCOL_IPSEC &&
+		    sec_cap->ipsec.mode ==
+			sess_conf.ipsec.mode &&
+		    sec_cap->ipsec.direction == dir)
+			break;
+		sec_cap++;
+	}
+
+	if (sec_cap->action == RTE_SECURITY_ACTION_TYPE_NONE) {
+		printf("No suitable security capability found\n");
+		return TEST_SKIPPED;
+	}
+
+	ips->security.ol_flags = sec_cap->ol_flags;
+	ips->security.ctx = sec_ctx;
+
+	return 0;
+}
+
+/* Check the link status of all ports in up to 3s, and print them finally */
+static void
+check_all_ports_link_status(uint16_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 30 /* 3s (30 * 100ms) in total */
+	uint16_t portid;
+	uint8_t count, all_ports_up, print_flag = 0;
+	struct rte_eth_link link;
+	int ret;
+	char link_status[RTE_ETH_LINK_MAX_STR_LEN];
+
+	printf("Checking link statuses...\n");
+	fflush(stdout);
+	for (count = 0; count <= MAX_CHECK_TIME; count++) {
+		all_ports_up = 1;
+		for (portid = 0; portid < port_num; portid++) {
+			if ((port_mask & (1 << portid)) == 0)
+				continue;
+			memset(&link, 0, sizeof(link));
+			ret = rte_eth_link_get_nowait(portid, &link);
+			if (ret < 0) {
+				all_ports_up = 0;
+				if (print_flag == 1)
+					printf("Port %u link get failed: %s\n",
+						portid, rte_strerror(-ret));
+				continue;
+			}
+
+			/* print link status if flag set */
+			if (print_flag == 1) {
+				if (link.link_status && link_mbps == 0)
+					link_mbps = link.link_speed;
+
+				rte_eth_link_to_str(link_status,
+					sizeof(link_status), &link);
+				printf("Port %d %s\n", portid, link_status);
+				continue;
+			}
+			/* clear all_ports_up flag if any link down */
+			if (link.link_status == RTE_ETH_LINK_DOWN) {
+				all_ports_up = 0;
+				break;
+			}
+		}
+		/* after finally printing all link status, get out */
+		if (print_flag == 1)
+			break;
+
+		if (all_ports_up == 0) {
+			fflush(stdout);
+			rte_delay_ms(CHECK_INTERVAL);
+		}
+
+		/* set the print_flag if all ports up or timeout */
+		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1))
+			print_flag = 1;
+	}
+}
+
+static void
+print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr)
+{
+	char buf[RTE_ETHER_ADDR_FMT_SIZE];
+	rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr);
+	printf("%s%s", name, buf);
+}
+
+static void
+copy_buf_to_pkt_segs(void *buf, unsigned int len,
+		     struct rte_mbuf *pkt, unsigned int offset)
+{
+	struct rte_mbuf *seg;
+	void *seg_buf;
+	unsigned int copy_len;
+
+	seg = pkt;
+	while (offset >= seg->data_len) {
+		offset -= seg->data_len;
+		seg = seg->next;
+	}
+	copy_len = seg->data_len - offset;
+	seg_buf = rte_pktmbuf_mtod_offset(seg, char *, offset);
+	while (len > copy_len) {
+		rte_memcpy(seg_buf, buf, (size_t) copy_len);
+		len -= copy_len;
+		buf = ((char *) buf + copy_len);
+		seg = seg->next;
+		seg_buf = rte_pktmbuf_mtod(seg, void *);
+	}
+	rte_memcpy(seg_buf, buf, (size_t) len);
+}
+
+static inline void
+copy_buf_to_pkt(void *buf, unsigned int len,
+		struct rte_mbuf *pkt, unsigned int offset)
+{
+	if (offset + len <= pkt->data_len) {
+		rte_memcpy(rte_pktmbuf_mtod_offset(pkt, char *, offset), buf,
+			   (size_t) len);
+		return;
+	}
+	copy_buf_to_pkt_segs(buf, len, pkt, offset);
+}
+
+static inline int
+init_traffic(struct rte_mempool *mp,
+	     struct rte_mbuf **pkts_burst,
+	     struct ipsec_test_packet *vectors[],
+	     uint32_t nb_pkts)
+{
+	struct rte_mbuf *pkt;
+	uint32_t i;
+
+	for (i = 0; i < nb_pkts; i++) {
+		pkt = rte_pktmbuf_alloc(mp);
+		if (pkt == NULL)
+			return TEST_FAILED;
+
+		pkt->data_len = vectors[i]->len;
+		pkt->pkt_len = vectors[i]->len;
+		copy_buf_to_pkt(vectors[i]->data, vectors[i]->len,
+				pkt, vectors[i]->l2_offset);
+
+		pkts_burst[i] = pkt;
+	}
+	return i;
+}
+
+static int
+init_lcore(void)
+{
+	unsigned int lcore_id;
+
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		lcore_cfg.socketid =
+			rte_lcore_to_socket_id(lcore_id);
+		if (rte_lcore_is_enabled(lcore_id) == 0) {
+			lcore_cfg.status = LCORE_INVALID;
+			continue;
+		} else {
+			lcore_cfg.status = LCORE_AVAIL;
+			break;
+		}
+	}
+	return 0;
+}
+
+static int
+init_mempools(unsigned int nb_mbuf)
+{
+	struct rte_security_ctx *sec_ctx;
+	int socketid;
+	unsigned int lcore_id;
+	uint16_t nb_sess = 512;
+	uint32_t sess_sz;
+	char s[64];
+
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (rte_lcore_is_enabled(lcore_id) == 0)
+			continue;
+
+		socketid = rte_lcore_to_socket_id(lcore_id);
+		if (socketid >= NB_SOCKETS)
+			printf("Socket %d of lcore %u is out of range %d\n",
+				socketid, lcore_id, NB_SOCKETS);
+
+		if (mbufpool[socketid] == NULL) {
+			snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
+			mbufpool[socketid] = rte_pktmbuf_pool_create(s, nb_mbuf,
+					MEMPOOL_CACHE_SIZE, 0,
+					RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
+			if (mbufpool[socketid] == NULL)
+				printf("Cannot init mbuf pool on socket %d\n",
+					socketid);
+			printf("Allocated mbuf pool on socket %d\n", socketid);
+		}
+
+		sec_ctx = rte_eth_dev_get_sec_ctx(lcore_cfg.port);
+		if (sec_ctx == NULL)
+			continue;
+
+		sess_sz = rte_security_session_get_size(sec_ctx);
+		if (sess_pool[socketid] == NULL) {
+			snprintf(s, sizeof(s), "sess_pool_%d", socketid);
+			sess_pool[socketid] =
+				rte_mempool_create(s, nb_sess,
+					sess_sz,
+					MEMPOOL_CACHE_SIZE, 0,
+					NULL, NULL, NULL, NULL,
+					socketid, 0);
+			if (sess_pool[socketid] == NULL) {
+				printf("Cannot init sess pool on socket %d\n",
+					socketid);
+				return TEST_FAILED;
+			}
+			printf("Allocated sess pool on socket %d\n", socketid);
+		}
+		if (sess_priv_pool[socketid] == NULL) {
+			snprintf(s, sizeof(s), "sess_priv_pool_%d", socketid);
+			sess_priv_pool[socketid] =
+				rte_mempool_create(s, nb_sess,
+					sess_sz,
+					MEMPOOL_CACHE_SIZE, 0,
+					NULL, NULL, NULL, NULL,
+					socketid, 0);
+			if (sess_priv_pool[socketid] == NULL) {
+				printf("Cannot init sess_priv pool on socket %d\n",
+					socketid);
+				return TEST_FAILED;
+			}
+			printf("Allocated sess_priv pool on socket %d\n",
+					socketid);
+		}
+	}
+	return 0;
+}
+
+static void
+create_default_flow(uint16_t port_id)
+{
+	struct rte_flow_action action[2];
+	struct rte_flow_item pattern[2];
+	struct rte_flow_attr attr = {0};
+	struct rte_flow_error err;
+	struct rte_flow *flow;
+	int ret;
+
+	/* Add the default rte_flow to enable SECURITY for all ESP packets */
+
+	pattern[0].type = RTE_FLOW_ITEM_TYPE_ESP;
+	pattern[0].spec = NULL;
+	pattern[0].mask = NULL;
+	pattern[0].last = NULL;
+	pattern[1].type = RTE_FLOW_ITEM_TYPE_END;
+
+	action[0].type = RTE_FLOW_ACTION_TYPE_SECURITY;
+	action[0].conf = NULL;
+	action[1].type = RTE_FLOW_ACTION_TYPE_END;
+	action[1].conf = NULL;
+
+	attr.ingress = 1;
+
+	ret = rte_flow_validate(port_id, &attr, pattern, action, &err);
+	if (ret)
+		return;
+
+	flow = rte_flow_create(port_id, &attr, pattern, action, &err);
+	if (flow == NULL) {
+		printf("\nDefault flow rule create failed\n");
+		return;
+	}
+
+	default_flow[port_id] = flow;
+}
+
+static void
+destroy_default_flow(uint16_t port_id)
+{
+	struct rte_flow_error err;
+	int ret;
+	if (!default_flow[port_id])
+		return;
+	ret = rte_flow_destroy(port_id, default_flow[port_id], &err);
+	if (ret) {
+		printf("\nDefault flow rule destroy failed\n");
+		return;
+	}
+	default_flow[port_id] = NULL;
+}
+
+struct rte_mbuf **tx_pkts_burst;
+struct rte_mbuf **rx_pkts_burst;
+
+static int
+test_ipsec(struct reassembly_vector *vector,
+	   enum rte_security_ipsec_sa_direction dir,
+	   enum rte_security_ipsec_tunnel_type tun_type)
+{
+	unsigned int i, portid, nb_rx = 0, nb_tx = 1;
+	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+	struct rte_eth_dev_info dev_info = {0};
+	struct rte_ipsec_session ips = {0};
+
+	portid = lcore_cfg.port;
+	rte_eth_dev_info_get(portid, &dev_info);
+	if (dev_info.reass_capa.max_frags < nb_tx)
+		return TEST_SKIPPED;
+
+	init_traffic(mbufpool[lcore_cfg.socketid],
+			tx_pkts_burst, vector->frags, nb_tx);
+
+	/* Create Inline IPsec session. */
+	if (create_inline_ipsec_session(vector->sa_data, portid, &ips, dir,
+					tun_type))
+		return TEST_FAILED;
+	if (dir == RTE_SECURITY_IPSEC_SA_DIR_INGRESS)
+		create_default_flow(portid);
+	else {
+		for (i = 0; i < nb_tx; i++) {
+			if (ips.security.ol_flags &
+					RTE_SECURITY_TX_OLOAD_NEED_MDATA)
+				rte_security_set_pkt_metadata(ips.security.ctx,
+				ips.security.ses, tx_pkts_burst[i], NULL);
+			tx_pkts_burst[i]->ol_flags |= RTE_MBUF_F_TX_SEC_OFFLOAD;
+			tx_pkts_burst[i]->l2_len = 14;
+		}
+	}
+
+	nb_tx = rte_eth_tx_burst(portid, 0, tx_pkts_burst, nb_tx);
+
+	rte_pause();
+
+	int j = 0;
+	do {
+		nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst, MAX_PKT_BURST);
+		rte_delay_ms(100);
+		j++;
+	} while (nb_rx == 0 && j < 5);
+
+	destroy_default_flow(portid);
+
+	/* Destroy session so that other cases can create the session again */
+	rte_security_session_destroy(ips.security.ctx, ips.security.ses);
+
+	/* Compare results with known vectors. */
+	if (nb_rx == 1) {
+		if (memcmp(rte_pktmbuf_mtod(pkts_burst[0], char *),
+					vector->full_pkt->data,
+					(size_t) vector->full_pkt->len)) {
+			printf("\n====Inline IPsec case failed: Data Mismatch");
+			rte_hexdump(stdout, "received",
+				rte_pktmbuf_mtod(pkts_burst[0], char *),
+				vector->full_pkt->len);
+			rte_hexdump(stdout, "reference",
+				vector->full_pkt->data,
+				vector->full_pkt->len);
+			return TEST_FAILED;
+		}
+		return TEST_SUCCESS;
+	} else
+		return TEST_FAILED;
+}
+
+static int
+ut_setup_inline_ipsec(void)
+{
+	uint16_t portid = lcore_cfg.port;
+	int ret;
+
+	/* Start device */
+	ret = rte_eth_dev_start(portid);
+	if (ret < 0) {
+		printf("rte_eth_dev_start: err=%d, port=%d\n",
+			ret, portid);
+		return ret;
+	}
+	/* always enable promiscuous */
+	ret = rte_eth_promiscuous_enable(portid);
+	if (ret != 0) {
+		printf("rte_eth_promiscuous_enable: err=%s, port=%d\n",
+			rte_strerror(-ret), portid);
+		return ret;
+	}
+	lcore_cfg.port = portid;
+	check_all_ports_link_status(1, RTE_PORT_ALL);
+
+	return 0;
+}
+
+static void
+ut_teardown_inline_ipsec(void)
+{
+	uint16_t portid = lcore_cfg.port;
+	int socketid = lcore_cfg.socketid;
+	int ret;
+
+	/* port tear down */
+	RTE_ETH_FOREACH_DEV(portid) {
+		if (socketid != rte_eth_dev_socket_id(portid))
+			continue;
+
+		ret = rte_eth_dev_stop(portid);
+		if (ret != 0)
+			printf("rte_eth_dev_stop: err=%s, port=%u\n",
+			       rte_strerror(-ret), portid);
+	}
+}
+
+static int
+testsuite_setup(void)
+{
+	uint16_t nb_rxd;
+	uint16_t nb_txd;
+	uint16_t nb_ports;
+	int socketid, ret;
+	uint16_t nb_rx_queue = 1, nb_tx_queue = 1;
+	uint16_t portid = lcore_cfg.port;
+	struct rte_eth_dev_info dev_info = {0};
+
+	printf("Start inline IPsec test.\n");
+
+	nb_ports = rte_eth_dev_count_avail();
+	if (nb_ports < NB_ETHPORTS_USED) {
+		printf("At least %u port(s) used for test\n",
+		       NB_ETHPORTS_USED);
+		return -1;
+	}
+
+	init_lcore();
+
+	init_mempools(NB_MBUF);
+
+	socketid = lcore_cfg.socketid;
+	if (tx_pkts_burst == NULL) {
+		tx_pkts_burst = (struct rte_mbuf **)
+			rte_calloc_socket("tx_buff",
+					  MAX_TRAFFIC_BURST * nb_ports,
+					  sizeof(void *),
+					  RTE_CACHE_LINE_SIZE, socketid);
+		if (!tx_pkts_burst)
+			return -1;
+
+		rx_pkts_burst = (struct rte_mbuf **)
+			rte_calloc_socket("rx_buff",
+					  MAX_TRAFFIC_BURST * nb_ports,
+					  sizeof(void *),
+					  RTE_CACHE_LINE_SIZE, socketid);
+		if (!rx_pkts_burst)
+			return -1;
+	}
+
+	printf("Generate %d packets @socket %d\n",
+	       MAX_TRAFFIC_BURST * nb_ports, socketid);
+
+	nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+	nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+	/* port configure */
+	ret = rte_eth_dev_configure(portid, nb_rx_queue,
+				    nb_tx_queue, &port_conf);
+	if (ret < 0) {
+		printf("Cannot configure device: err=%d, port=%d\n",
+			 ret, portid);
+		return ret;
+	}
+	ret = rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
+	if (ret < 0) {
+		printf("Cannot get mac address: err=%d, port=%d\n",
+			 ret, portid);
+		return ret;
+	}
+	printf("Port %u ", portid);
+	print_ethaddr("Address:", &ports_eth_addr[portid]);
+	printf("\n");
+
+	/* tx queue setup */
+	ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
+				     socketid, &tx_conf);
+	if (ret < 0) {
+		printf("rte_eth_tx_queue_setup: err=%d, port=%d\n",
+				ret, portid);
+		return ret;
+	}
+	/* rx queue steup */
+	ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
+					socketid, &rx_conf,
+					mbufpool[socketid]);
+	if (ret < 0) {
+		printf("rte_eth_rx_queue_setup: err=%d, port=%d\n",
+				ret, portid);
+		return ret;
+	}
+
+	rte_eth_dev_info_get(portid, &dev_info);
+
+	if (dev_info.reass_capa.reass_timeout > APP_REASS_TIMEOUT) {
+		dev_info.reass_capa.reass_timeout = APP_REASS_TIMEOUT;
+		rte_eth_ip_reassembly_conf_set(portid, &dev_info.reass_capa);
+	}
+
+	return 0;
+}
+
+static void
+testsuite_teardown(void)
+{
+	int ret;
+	uint16_t portid = lcore_cfg.port;
+	uint16_t socketid = lcore_cfg.socketid;
+
+	/* port tear down */
+	RTE_ETH_FOREACH_DEV(portid) {
+		if (socketid != rte_eth_dev_socket_id(portid))
+			continue;
+
+		ret = rte_eth_dev_reset(portid);
+		if (ret != 0)
+			printf("rte_eth_dev_reset: err=%s, port=%u\n",
+			       rte_strerror(-ret), portid);
+	}
+}
+static int
+test_ipsec_ipv4_encap_nofrag(void)
+{
+	struct reassembly_vector ipv4_nofrag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_gcm128_cipher,
+				.frags[0] = &pkt_ipv4_plain,
+				.nb_frags = 1,
+	};
+	return test_ipsec(&ipv4_nofrag_case,
+			RTE_SECURITY_IPSEC_SA_DIR_EGRESS,
+			RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+static int
+test_ipsec_ipv4_decap_nofrag(void)
+{
+	struct reassembly_vector ipv4_nofrag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_plain,
+				.frags[0] = &pkt_ipv4_gcm128_cipher,
+				.nb_frags = 1,
+	};
+	return test_ipsec(&ipv4_nofrag_case,
+			RTE_SECURITY_IPSEC_SA_DIR_INGRESS,
+			RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+static struct unit_test_suite inline_ipsec_testsuite  = {
+	.suite_name = "Inline IPsec Ethernet Device Unit Test Suite",
+	.setup = testsuite_setup,
+	.teardown = testsuite_teardown,
+	.unit_test_cases = {
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_ipsec_ipv4_encap_nofrag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_ipsec_ipv4_decap_nofrag),
+
+		TEST_CASES_END() /**< NULL terminate unit test array */
+	}
+};
+
+static int
+test_inline_ipsec(void)
+{
+	return unit_test_suite_runner(&inline_ipsec_testsuite);
+}
+
+REGISTER_TEST_COMMAND(inline_ipsec_autotest, test_inline_ipsec);
diff --git a/app/test/test_security_inline_proto_vectors.h b/app/test/test_security_inline_proto_vectors.h
new file mode 100644
index 0000000000..08e6868b0d
--- /dev/null
+++ b/app/test/test_security_inline_proto_vectors.h
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+#ifndef _TEST_INLINE_IPSEC_REASSEMBLY_VECTORS_H_
+#define _TEST_INLINE_IPSEC_REASSEMBLY_VECTORS_H_
+
+#define MAX_FRAG_LEN		 1500
+#define MAX_FRAGS		 6
+#define MAX_PKT_LEN		 (MAX_FRAG_LEN * MAX_FRAGS)
+struct ipsec_session_data {
+	struct {
+		uint8_t data[32];
+	} key;
+	struct {
+		uint8_t data[4];
+		unsigned int len;
+	} salt;
+	struct {
+		uint8_t data[16];
+	} iv;
+	struct rte_security_ipsec_xform ipsec_xform;
+	bool aead;
+	union {
+		struct {
+			struct rte_crypto_sym_xform cipher;
+			struct rte_crypto_sym_xform auth;
+		} chain;
+		struct rte_crypto_sym_xform aead;
+	} xform;
+};
+
+struct ipsec_test_packet {
+	uint32_t len;
+	uint32_t l2_offset;
+	uint32_t l3_offset;
+	uint32_t l4_offset;
+	uint8_t data[MAX_PKT_LEN];
+};
+
+struct reassembly_vector {
+	struct ipsec_session_data *sa_data;
+	struct ipsec_test_packet *full_pkt;
+	struct ipsec_test_packet *frags[MAX_FRAGS];
+	uint16_t nb_frags;
+};
+
+struct ipsec_test_packet pkt_ipv4_plain = {
+	.len = 76,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x00, 0x3e, 0x69, 0x8f, 0x00, 0x00,
+		0x80, 0x11, 0x4d, 0xcc, 0xc0, 0xa8, 0x01, 0x02,
+		0xc0, 0xa8, 0x01, 0x01,
+
+		/* UDP */
+		0x0a, 0x98, 0x00, 0x35, 0x00, 0x2a, 0x23, 0x43,
+		0xb2, 0xd0, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x03, 0x73, 0x69, 0x70,
+		0x09, 0x63, 0x79, 0x62, 0x65, 0x72, 0x63, 0x69,
+		0x74, 0x79, 0x02, 0x64, 0x6b, 0x00, 0x00, 0x01,
+		0x00, 0x01,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_gcm128_cipher = {
+	.len = 130,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP - outer header */
+		0x45, 0x00, 0x00, 0x74, 0x00, 0x01, 0x00, 0x00,
+		0x40, 0x32, 0xf7, 0x03, 0xc0, 0xa8, 0x01, 0x02,
+		0xc0, 0xa8, 0x01, 0x01,
+
+		/* ESP */
+		0x00, 0x00, 0xa5, 0xf8, 0x00, 0x00, 0x00, 0x01,
+
+		/* IV */
+		0xfa, 0xce, 0xdb, 0xad, 0xde, 0xca, 0xf8, 0x88,
+
+		/* Data */
+		0xde, 0xb2, 0x2c, 0xd9, 0xb0, 0x7c, 0x72, 0xc1,
+		0x6e, 0x3a, 0x65, 0xbe, 0xeb, 0x8d, 0xf3, 0x04,
+		0xa5, 0xa5, 0x89, 0x7d, 0x33, 0xae, 0x53, 0x0f,
+		0x1b, 0xa7, 0x6d, 0x5d, 0x11, 0x4d, 0x2a, 0x5c,
+		0x3d, 0xe8, 0x18, 0x27, 0xc1, 0x0e, 0x9a, 0x4f,
+		0x51, 0x33, 0x0d, 0x0e, 0xec, 0x41, 0x66, 0x42,
+		0xcf, 0xbb, 0x85, 0xa5, 0xb4, 0x7e, 0x48, 0xa4,
+		0xec, 0x3b, 0x9b, 0xa9, 0x5d, 0x91, 0x8b, 0xd4,
+		0x29, 0xc7, 0x37, 0x57, 0x9f, 0xf1, 0x9e, 0x58,
+		0xcf, 0xfc, 0x60, 0x7a, 0x3b, 0xce, 0x89, 0x94,
+	},
+};
+
+static inline void
+test_vector_payload_populate(struct ipsec_test_packet *pkt,
+		bool first_frag)
+{
+	uint32_t i = pkt->l4_offset;
+
+	/**
+	 * For non-fragmented packets and first frag, skip 8 bytes from
+	 * l4_offset for UDP header.
+	 */
+	if (first_frag)
+		i += 8;
+
+	for (; i < pkt->len; i++)
+		pkt->data[i] = 0x58;
+}
+
+struct ipsec_session_data conf_aes_128_gcm = {
+	.key = {
+		.data = {
+			0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
+			0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
+		},
+	},
+
+	.salt = {
+		.data = {
+			0xca, 0xfe, 0xba, 0xbe
+		},
+		.len = 4,
+	},
+
+	.iv = {
+		.data = {
+			0xfa, 0xce, 0xdb, 0xad, 0xde, 0xca, 0xf8, 0x88
+		},
+	},
+
+	.ipsec_xform = {
+		.spi = 0xa5f8,
+		.salt = 0xbebafeca,
+		.options.esn = 0,
+		.options.udp_encap = 0,
+		.options.copy_dscp = 0,
+		.options.copy_flabel = 0,
+		.options.copy_df = 0,
+		.options.dec_ttl = 0,
+		.options.ecn = 0,
+		.options.stats = 0,
+		.options.tunnel_hdr_verify = 0,
+		.options.ip_csum_enable = 0,
+		.options.l4_csum_enable = 0,
+		.options.reass_en = 1,
+		.direction = RTE_SECURITY_IPSEC_SA_DIR_EGRESS,
+		.proto = RTE_SECURITY_IPSEC_SA_PROTO_ESP,
+		.mode = RTE_SECURITY_IPSEC_SA_MODE_TUNNEL,
+		.tunnel.type = RTE_SECURITY_IPSEC_TUNNEL_IPV4,
+		.replay_win_sz = 0,
+	},
+
+	.aead = true,
+
+	.xform = {
+		.aead = {
+			.next = NULL,
+			.type = RTE_CRYPTO_SYM_XFORM_AEAD,
+			.aead = {
+				.op = RTE_CRYPTO_AEAD_OP_ENCRYPT,
+				.algo = RTE_CRYPTO_AEAD_AES_GCM,
+				.key.length = 16,
+				.iv.length = 12,
+				.iv.offset = 0,
+				.digest_length = 16,
+				.aad_length = 12,
+			},
+		},
+	},
+};
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v2 2/4] app/test: add IP reassembly case with no frags
  2022-01-20 16:48     ` [PATCH v2 0/4] app/test: add inline IPsec and reassembly cases Akhil Goyal
  2022-01-20 16:48       ` [PATCH v2 1/4] app/test: add unit cases for inline IPsec offload Akhil Goyal
@ 2022-01-20 16:48       ` Akhil Goyal
  2022-01-20 16:48       ` [PATCH v2 3/4] app/test: add IP reassembly cases with multiple fragments Akhil Goyal
  2022-01-20 16:48       ` [PATCH v2 4/4] app/test: add IP reassembly negative cases Akhil Goyal
  3 siblings, 0 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-20 16:48 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, jerinj, Akhil Goyal, Nithin Dabilpuram

test_inline_ipsec testsuite is extended to test IP reassembly of inbound
fragmented packets. The fragmented packet is sent on an interface
which encrypts the packet and then it is loopbacked on the
same interface which decrypts the packet and then attempts IP reassembly
of the decrypted packets.
In this patch, a case is added for packets without fragmentation to
verify the complete path. Other cases are added in subsequent patches.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
---
 app/test/test_security_inline_proto.c         | 325 ++++++++++++++++++
 app/test/test_security_inline_proto_vectors.h |   1 +
 2 files changed, 326 insertions(+)

diff --git a/app/test/test_security_inline_proto.c b/app/test/test_security_inline_proto.c
index 4738792cb8..9dc083369a 100644
--- a/app/test/test_security_inline_proto.c
+++ b/app/test/test_security_inline_proto.c
@@ -25,6 +25,8 @@
 #define RTE_TEST_TX_DESC_DEFAULT        (1024)
 #define RTE_PORT_ALL            (~(uint16_t)0x0)
 
+#define ENCAP_DECAP_BURST_SZ 33
+
 /*
  * RX and TX Prefetch, Host, and Write-back threshold values should be
  * carefully set for optimal performance. Consult the network
@@ -103,6 +105,8 @@ struct lcore_cfg lcore_cfg;
 
 static uint64_t link_mbps;
 
+static int ip_reass_dynfield_offset = -1;
+
 static struct rte_flow *default_flow[RTE_MAX_ETHPORTS];
 
 /* Create Inline IPsec session */
@@ -477,6 +481,293 @@ destroy_default_flow(uint16_t port_id)
 struct rte_mbuf **tx_pkts_burst;
 struct rte_mbuf **rx_pkts_burst;
 
+static int
+compare_pkt_data(struct rte_mbuf *m, uint8_t *ref, unsigned int tot_len)
+{
+	unsigned int len;
+	unsigned int nb_segs = m->nb_segs;
+	unsigned int matched = 0;
+	struct rte_mbuf *save = m;
+
+	while (m && nb_segs != 0) {
+		len = tot_len;
+		if (len > m->data_len)
+			len = m->data_len;
+		if (len != 0) {
+			if (memcmp(rte_pktmbuf_mtod(m, char *),
+					ref + matched, len)) {
+				printf("\n====Reassembly case failed: Data Mismatch");
+				rte_hexdump(stdout, "Reassembled",
+					rte_pktmbuf_mtod(m, char *),
+					len);
+				rte_hexdump(stdout, "reference",
+					ref + matched,
+					len);
+				return TEST_FAILED;
+			}
+		}
+		tot_len -= len;
+		matched += len;
+		m = m->next;
+		nb_segs--;
+	}
+
+	if (tot_len) {
+		printf("\n====Reassembly case failed: Data Missing %u",
+		       tot_len);
+		printf("\n====nb_segs %u, tot_len %u", nb_segs, tot_len);
+		rte_pktmbuf_dump(stderr, save, -1);
+		return TEST_FAILED;
+	}
+	return TEST_SUCCESS;
+}
+
+static inline bool
+is_ip_reassembly_incomplete(struct rte_mbuf *mbuf)
+{
+	static uint64_t ip_reass_dynflag;
+	int ip_reass_dynflag_offset;
+
+	if (ip_reass_dynflag == 0) {
+		ip_reass_dynflag_offset = rte_mbuf_dynflag_lookup(
+				RTE_ETH_IP_REASS_INCOMPLETE_DYNFLAG_NAME, NULL);
+		if (ip_reass_dynflag_offset < 0)
+			return false;
+		ip_reass_dynflag = RTE_BIT64(ip_reass_dynflag_offset);
+	}
+
+	return (mbuf->ol_flags & ip_reass_dynflag) != 0;
+}
+
+static void
+free_mbuf(struct rte_mbuf *mbuf)
+{
+	rte_eth_ip_reass_dynfield_t dynfield;
+
+	if (!mbuf)
+		return;
+
+	if (!is_ip_reassembly_incomplete(mbuf)) {
+		rte_pktmbuf_free(mbuf);
+	} else {
+		if (ip_reass_dynfield_offset < 0)
+			return;
+
+		while (mbuf) {
+			dynfield = *RTE_MBUF_DYNFIELD(mbuf, ip_reass_dynfield_offset,
+						      rte_eth_ip_reass_dynfield_t *);
+			rte_pktmbuf_free(mbuf);
+			mbuf = dynfield.next_frag;
+		}
+	}
+}
+
+
+static int
+get_and_verify_incomplete_frags(struct rte_mbuf *mbuf,
+				struct reassembly_vector *vector)
+{
+	rte_eth_ip_reass_dynfield_t *dynfield[MAX_PKT_BURST];
+	int j = 0, ret;
+	/**
+	 * IP reassembly offload is incomplete, and fragments are listed in
+	 * dynfield which can be reassembled in SW.
+	 */
+	printf("\nHW IP Reassembly is not complete; attempt SW IP Reassembly,"
+		"\nMatching with original frags.");
+
+	if (ip_reass_dynfield_offset < 0)
+		return -1;
+
+	printf("\ncomparing frag: %d", j);
+	ret = compare_pkt_data(mbuf, vector->frags[j]->data,
+				vector->frags[j]->len);
+	if (ret)
+		return ret;
+	j++;
+	dynfield[j] = RTE_MBUF_DYNFIELD(mbuf, ip_reass_dynfield_offset,
+					rte_eth_ip_reass_dynfield_t *);
+	printf("\ncomparing frag: %d", j);
+	ret = compare_pkt_data(dynfield[j]->next_frag, vector->frags[j]->data,
+			vector->frags[j]->len);
+	if (ret)
+		return ret;
+
+	while ((dynfield[j]->nb_frags > 1) &&
+			is_ip_reassembly_incomplete(dynfield[j]->next_frag)) {
+		j++;
+		dynfield[j] = RTE_MBUF_DYNFIELD(dynfield[j-1]->next_frag,
+						ip_reass_dynfield_offset,
+						rte_eth_ip_reass_dynfield_t *);
+		printf("\ncomparing frag: %d", j);
+		ret = compare_pkt_data(dynfield[j]->next_frag,
+				vector->frags[j]->data, vector->frags[j]->len);
+		if (ret)
+			return ret;
+	}
+	return ret;
+}
+
+static int
+test_ipsec_encap_decap(struct reassembly_vector *vector,
+		       enum rte_security_ipsec_tunnel_type tun_type)
+{
+	struct rte_ipsec_session out_ips[ENCAP_DECAP_BURST_SZ] = {0};
+	struct rte_ipsec_session in_ips[ENCAP_DECAP_BURST_SZ] = {0};
+	unsigned int nb_tx, burst_sz, nb_sent = 0;
+	struct rte_eth_dev_info dev_info = {0};
+	unsigned int i, portid, nb_rx = 0, j;
+	struct ipsec_session_data sa_data;
+	int ret = 0;
+
+	burst_sz = vector->burst ? ENCAP_DECAP_BURST_SZ : 1;
+
+	portid = lcore_cfg.port;
+	rte_eth_dev_info_get(portid, &dev_info);
+	if (dev_info.reass_capa.max_frags < vector->nb_frags)
+		return TEST_SKIPPED;
+
+	nb_tx = vector->nb_frags * burst_sz;
+	memset(tx_pkts_burst, 0, sizeof(tx_pkts_burst[0]) * nb_tx);
+	memset(rx_pkts_burst, 0, sizeof(rx_pkts_burst[0]) * nb_tx);
+
+	for (i = 0; i < nb_tx; i += vector->nb_frags) {
+		ret = init_traffic(mbufpool[lcore_cfg.socketid],
+				   &tx_pkts_burst[i], vector->frags,
+				   vector->nb_frags);
+		if (ret != vector->nb_frags) {
+			ret = -1;
+			goto out;
+		}
+	}
+
+	for (i = 0; i < burst_sz; i++) {
+		memcpy(&sa_data, vector->sa_data, sizeof(sa_data));
+		/* Update SPI for every new SA */
+		sa_data.ipsec_xform.spi += i;
+
+		/* Create Inline IPsec outbound session. */
+		ret = create_inline_ipsec_session(&sa_data, portid, &out_ips[i],
+						  RTE_SECURITY_IPSEC_SA_DIR_EGRESS,
+						  tun_type);
+		if (ret)
+			goto out;
+	}
+
+	j = 0;
+	for (i = 0; i < nb_tx; i++) {
+		if (out_ips[j].security.ol_flags &
+				RTE_SECURITY_TX_OLOAD_NEED_MDATA)
+			rte_security_set_pkt_metadata(out_ips[j].security.ctx,
+				out_ips[j].security.ses, tx_pkts_burst[i], NULL);
+		tx_pkts_burst[i]->ol_flags |= RTE_MBUF_F_TX_SEC_OFFLOAD;
+		tx_pkts_burst[i]->l2_len = RTE_ETHER_HDR_LEN;
+
+		/* Move to next SA after nb_frags */
+		if ((i + 1) % vector->nb_frags == 0)
+			j++;
+	}
+
+	for (i = 0; i < burst_sz; i++) {
+		memcpy(&sa_data, vector->sa_data, sizeof(sa_data));
+		/* Update SPI for every new SA */
+		sa_data.ipsec_xform.spi += i;
+
+		/* Create Inline IPsec inbound session. */
+		ret = create_inline_ipsec_session(&sa_data, portid, &in_ips[i],
+						  RTE_SECURITY_IPSEC_SA_DIR_INGRESS,
+						  tun_type);
+		if (ret)
+			goto out;
+	}
+
+	/* Retrieve reassembly dynfield offset if available */
+	if (ip_reass_dynfield_offset < 0 && vector->nb_frags > 1)
+		ip_reass_dynfield_offset = rte_mbuf_dynfield_lookup(
+				RTE_ETH_IP_REASS_DYNFIELD_NAME, NULL);
+
+
+	create_default_flow(portid);
+
+	nb_sent = rte_eth_tx_burst(portid, 0, tx_pkts_burst, nb_tx);
+	if (nb_sent != nb_tx) {
+		ret = -1;
+		printf("\nFailed to tx %u pkts", nb_tx);
+		goto out;
+	}
+
+	rte_delay_ms(100);
+
+	/* Retry few times before giving up */
+	nb_rx = 0;
+	j = 0;
+	do {
+		nb_rx += rte_eth_rx_burst(portid, 0, &rx_pkts_burst[nb_rx],
+					  nb_tx - nb_rx);
+		j++;
+		if (nb_rx >= nb_tx)
+			break;
+		rte_delay_ms(100);
+	} while (j < 5 || !nb_rx);
+
+	/* Check for minimum number of Rx packets expected */
+	if ((vector->nb_frags == 1 && nb_rx != nb_tx) ||
+	    (vector->nb_frags > 1 && nb_rx < burst_sz)) {
+		printf("\nreceived less Rx pkts(%u) pkts\n", nb_rx);
+		ret = TEST_FAILED;
+		goto out;
+	}
+
+	for (i = 0; i < nb_rx; i++) {
+		if (vector->nb_frags > 1 &&
+		    is_ip_reassembly_incomplete(rx_pkts_burst[i])) {
+			ret = get_and_verify_incomplete_frags(rx_pkts_burst[i],
+							      vector);
+			if (ret != TEST_SUCCESS)
+				break;
+			continue;
+		}
+
+		if (rx_pkts_burst[i]->ol_flags &
+		    RTE_MBUF_F_RX_SEC_OFFLOAD_FAILED ||
+		    !(rx_pkts_burst[i]->ol_flags & RTE_MBUF_F_RX_SEC_OFFLOAD)) {
+			printf("\nsecurity offload failed\n");
+			ret = TEST_FAILED;
+			break;
+		}
+
+		if (vector->full_pkt->len != rx_pkts_burst[i]->pkt_len) {
+			printf("\nreassembled/decrypted packet length mismatch\n");
+			ret = TEST_FAILED;
+			break;
+		}
+		ret = compare_pkt_data(rx_pkts_burst[i],
+				       vector->full_pkt->data,
+				       vector->full_pkt->len);
+		if (ret != TEST_SUCCESS)
+			break;
+	}
+
+out:
+	destroy_default_flow(portid);
+
+	/* Clear session data. */
+	for (i = 0; i < burst_sz; i++) {
+		if (out_ips[i].security.ses)
+			rte_security_session_destroy(out_ips[i].security.ctx,
+						     out_ips[i].security.ses);
+		if (in_ips[i].security.ses)
+			rte_security_session_destroy(in_ips[i].security.ctx,
+						     in_ips[i].security.ses);
+	}
+
+	for (i = nb_sent; i < nb_tx; i++)
+		free_mbuf(tx_pkts_burst[i]);
+	for (i = 0; i < nb_rx; i++)
+		free_mbuf(rx_pkts_burst[i]);
+	return ret;
+}
+
 static int
 test_ipsec(struct reassembly_vector *vector,
 	   enum rte_security_ipsec_sa_direction dir,
@@ -733,6 +1024,34 @@ test_ipsec_ipv4_decap_nofrag(void)
 			RTE_SECURITY_IPSEC_TUNNEL_IPV4);
 }
 
+static int
+test_reassembly_ipv4_nofrag(void)
+{
+	struct reassembly_vector ipv4_nofrag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_plain,
+				.frags[0] = &pkt_ipv4_plain,
+				.nb_frags = 1,
+	};
+	return test_ipsec_encap_decap(&ipv4_nofrag_case,
+				      RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+
+static int
+test_ipsec_ipv4_burst_encap_decap(void)
+{
+	struct reassembly_vector ipv4_nofrag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_plain,
+				.frags[0] = &pkt_ipv4_plain,
+				.nb_frags = 1,
+				.burst = true,
+	};
+	return test_ipsec_encap_decap(&ipv4_nofrag_case,
+				      RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
 static struct unit_test_suite inline_ipsec_testsuite  = {
 	.suite_name = "Inline IPsec Ethernet Device Unit Test Suite",
 	.setup = testsuite_setup,
@@ -744,6 +1063,12 @@ static struct unit_test_suite inline_ipsec_testsuite  = {
 		TEST_CASE_ST(ut_setup_inline_ipsec,
 				ut_teardown_inline_ipsec,
 				test_ipsec_ipv4_decap_nofrag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_ipv4_nofrag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_ipsec_ipv4_burst_encap_decap),
 
 		TEST_CASES_END() /**< NULL terminate unit test array */
 	}
diff --git a/app/test/test_security_inline_proto_vectors.h b/app/test/test_security_inline_proto_vectors.h
index 08e6868b0d..861c4fad48 100644
--- a/app/test/test_security_inline_proto_vectors.h
+++ b/app/test/test_security_inline_proto_vectors.h
@@ -42,6 +42,7 @@ struct reassembly_vector {
 	struct ipsec_test_packet *full_pkt;
 	struct ipsec_test_packet *frags[MAX_FRAGS];
 	uint16_t nb_frags;
+	bool burst;
 };
 
 struct ipsec_test_packet pkt_ipv4_plain = {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v2 3/4] app/test: add IP reassembly cases with multiple fragments
  2022-01-20 16:48     ` [PATCH v2 0/4] app/test: add inline IPsec and reassembly cases Akhil Goyal
  2022-01-20 16:48       ` [PATCH v2 1/4] app/test: add unit cases for inline IPsec offload Akhil Goyal
  2022-01-20 16:48       ` [PATCH v2 2/4] app/test: add IP reassembly case with no frags Akhil Goyal
@ 2022-01-20 16:48       ` Akhil Goyal
  2022-01-20 16:48       ` [PATCH v2 4/4] app/test: add IP reassembly negative cases Akhil Goyal
  3 siblings, 0 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-20 16:48 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, jerinj, Akhil Goyal

More cases are added in test_inline_ipsec test suite to verify packets
having multiple IP(v4/v6) fragments. These fragments are encrypted
and then decrypted as per inline IPsec processing and then an attempt
is made to reassemble the fragments. The reassembled packet
content is matched with the known test vectors.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
---
 app/test/test_security_inline_proto.c         | 147 ++++-
 app/test/test_security_inline_proto_vectors.h | 592 ++++++++++++++++++
 2 files changed, 738 insertions(+), 1 deletion(-)

diff --git a/app/test/test_security_inline_proto.c b/app/test/test_security_inline_proto.c
index 9dc083369a..d05325b205 100644
--- a/app/test/test_security_inline_proto.c
+++ b/app/test/test_security_inline_proto.c
@@ -1037,7 +1037,6 @@ test_reassembly_ipv4_nofrag(void)
 				      RTE_SECURITY_IPSEC_TUNNEL_IPV4);
 }
 
-
 static int
 test_ipsec_ipv4_burst_encap_decap(void)
 {
@@ -1052,6 +1051,134 @@ test_ipsec_ipv4_burst_encap_decap(void)
 				      RTE_SECURITY_IPSEC_TUNNEL_IPV4);
 }
 
+static int
+test_reassembly_ipv4_2frag(void)
+{
+	struct reassembly_vector ipv4_2frag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_udp_p1,
+				.frags[0] = &pkt_ipv4_udp_p1_f1,
+				.frags[1] = &pkt_ipv4_udp_p1_f2,
+				.nb_frags = 2,
+	};
+	test_vector_payload_populate(&pkt_ipv4_udp_p1, true);
+	test_vector_payload_populate(&pkt_ipv4_udp_p1_f1, true);
+	test_vector_payload_populate(&pkt_ipv4_udp_p1_f2, false);
+
+	return test_ipsec_encap_decap(&ipv4_2frag_case,
+				      RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+static int
+test_reassembly_ipv6_2frag(void)
+{
+	struct reassembly_vector ipv6_2frag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv6_udp_p1,
+				.frags[0] = &pkt_ipv6_udp_p1_f1,
+				.frags[1] = &pkt_ipv6_udp_p1_f2,
+				.nb_frags = 2,
+	};
+	test_vector_payload_populate(&pkt_ipv6_udp_p1, true);
+	test_vector_payload_populate(&pkt_ipv6_udp_p1_f1, true);
+	test_vector_payload_populate(&pkt_ipv6_udp_p1_f2, false);
+
+	return test_ipsec_encap_decap(&ipv6_2frag_case,
+				      RTE_SECURITY_IPSEC_TUNNEL_IPV6);
+}
+
+static int
+test_reassembly_ipv4_4frag(void)
+{
+	struct reassembly_vector ipv4_4frag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_udp_p2,
+				.frags[0] = &pkt_ipv4_udp_p2_f1,
+				.frags[1] = &pkt_ipv4_udp_p2_f2,
+				.frags[2] = &pkt_ipv4_udp_p2_f3,
+				.frags[3] = &pkt_ipv4_udp_p2_f4,
+				.nb_frags = 4,
+	};
+	test_vector_payload_populate(&pkt_ipv4_udp_p2, true);
+	test_vector_payload_populate(&pkt_ipv4_udp_p2_f1, true);
+	test_vector_payload_populate(&pkt_ipv4_udp_p2_f2, false);
+	test_vector_payload_populate(&pkt_ipv4_udp_p2_f3, false);
+	test_vector_payload_populate(&pkt_ipv4_udp_p2_f4, false);
+
+	return test_ipsec_encap_decap(&ipv4_4frag_case,
+				      RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+static int
+test_reassembly_ipv6_4frag(void)
+{
+	struct reassembly_vector ipv6_4frag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv6_udp_p2,
+				.frags[0] = &pkt_ipv6_udp_p2_f1,
+				.frags[1] = &pkt_ipv6_udp_p2_f2,
+				.frags[2] = &pkt_ipv6_udp_p2_f3,
+				.frags[3] = &pkt_ipv6_udp_p2_f4,
+				.nb_frags = 4,
+	};
+	test_vector_payload_populate(&pkt_ipv6_udp_p2, true);
+	test_vector_payload_populate(&pkt_ipv6_udp_p2_f1, true);
+	test_vector_payload_populate(&pkt_ipv6_udp_p2_f2, false);
+	test_vector_payload_populate(&pkt_ipv6_udp_p2_f3, false);
+	test_vector_payload_populate(&pkt_ipv6_udp_p2_f4, false);
+
+	return test_ipsec_encap_decap(&ipv6_4frag_case,
+				      RTE_SECURITY_IPSEC_TUNNEL_IPV6);
+}
+
+static int
+test_reassembly_ipv4_5frag(void)
+{
+	struct reassembly_vector ipv4_5frag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_udp_p3,
+				.frags[0] = &pkt_ipv4_udp_p3_f1,
+				.frags[1] = &pkt_ipv4_udp_p3_f2,
+				.frags[2] = &pkt_ipv4_udp_p3_f3,
+				.frags[3] = &pkt_ipv4_udp_p3_f4,
+				.frags[4] = &pkt_ipv4_udp_p3_f5,
+				.nb_frags = 5,
+	};
+	test_vector_payload_populate(&pkt_ipv4_udp_p3, true);
+	test_vector_payload_populate(&pkt_ipv4_udp_p3_f1, true);
+	test_vector_payload_populate(&pkt_ipv4_udp_p3_f2, false);
+	test_vector_payload_populate(&pkt_ipv4_udp_p3_f3, false);
+	test_vector_payload_populate(&pkt_ipv4_udp_p3_f4, false);
+	test_vector_payload_populate(&pkt_ipv4_udp_p3_f5, false);
+
+	return test_ipsec_encap_decap(&ipv4_5frag_case,
+				      RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+static int
+test_reassembly_ipv6_5frag(void)
+{
+	struct reassembly_vector ipv6_5frag_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv6_udp_p3,
+				.frags[0] = &pkt_ipv6_udp_p3_f1,
+				.frags[1] = &pkt_ipv6_udp_p3_f2,
+				.frags[2] = &pkt_ipv6_udp_p3_f3,
+				.frags[3] = &pkt_ipv6_udp_p3_f4,
+				.frags[4] = &pkt_ipv6_udp_p3_f5,
+				.nb_frags = 5,
+	};
+	test_vector_payload_populate(&pkt_ipv6_udp_p3, true);
+	test_vector_payload_populate(&pkt_ipv6_udp_p3_f1, true);
+	test_vector_payload_populate(&pkt_ipv6_udp_p3_f2, false);
+	test_vector_payload_populate(&pkt_ipv6_udp_p3_f3, false);
+	test_vector_payload_populate(&pkt_ipv6_udp_p3_f4, false);
+	test_vector_payload_populate(&pkt_ipv6_udp_p3_f5, false);
+
+	return test_ipsec_encap_decap(&ipv6_5frag_case,
+				      RTE_SECURITY_IPSEC_TUNNEL_IPV6);
+}
+
 static struct unit_test_suite inline_ipsec_testsuite  = {
 	.suite_name = "Inline IPsec Ethernet Device Unit Test Suite",
 	.setup = testsuite_setup,
@@ -1069,6 +1196,24 @@ static struct unit_test_suite inline_ipsec_testsuite  = {
 		TEST_CASE_ST(ut_setup_inline_ipsec,
 				ut_teardown_inline_ipsec,
 				test_ipsec_ipv4_burst_encap_decap),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_ipv4_2frag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_ipv6_2frag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_ipv4_4frag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_ipv6_4frag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_ipv4_5frag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_ipv6_5frag),
 
 		TEST_CASES_END() /**< NULL terminate unit test array */
 	}
diff --git a/app/test/test_security_inline_proto_vectors.h b/app/test/test_security_inline_proto_vectors.h
index 861c4fad48..49d94f37df 100644
--- a/app/test/test_security_inline_proto_vectors.h
+++ b/app/test/test_security_inline_proto_vectors.h
@@ -4,6 +4,47 @@
 #ifndef _TEST_INLINE_IPSEC_REASSEMBLY_VECTORS_H_
 #define _TEST_INLINE_IPSEC_REASSEMBLY_VECTORS_H_
 
+/* The source file includes below test vectors */
+/* IPv6:
+ *
+ *	1) pkt_ipv6_udp_p1
+ *		pkt_ipv6_udp_p1_f1
+ *		pkt_ipv6_udp_p1_f2
+ *
+ *	2) pkt_ipv6_udp_p2
+ *		pkt_ipv6_udp_p2_f1
+ *		pkt_ipv6_udp_p2_f2
+ *		pkt_ipv6_udp_p2_f3
+ *		pkt_ipv6_udp_p2_f4
+ *
+ *	3) pkt_ipv6_udp_p3
+ *		pkt_ipv6_udp_p3_f1
+ *		pkt_ipv6_udp_p3_f2
+ *		pkt_ipv6_udp_p3_f3
+ *		pkt_ipv6_udp_p3_f4
+ *		pkt_ipv6_udp_p3_f5
+ */
+
+/* IPv4:
+ *
+ *	1) pkt_ipv4_udp_p1
+ *		pkt_ipv4_udp_p1_f1
+ *		pkt_ipv4_udp_p1_f2
+ *
+ *	2) pkt_ipv4_udp_p2
+ *		pkt_ipv4_udp_p2_f1
+ *		pkt_ipv4_udp_p2_f2
+ *		pkt_ipv4_udp_p2_f3
+ *		pkt_ipv4_udp_p2_f4
+ *
+ *	3) pkt_ipv4_udp_p3
+ *		pkt_ipv4_udp_p3_f1
+ *		pkt_ipv4_udp_p3_f2
+ *		pkt_ipv4_udp_p3_f3
+ *		pkt_ipv4_udp_p3_f4
+ *		pkt_ipv4_udp_p3_f5
+ */
+
 #define MAX_FRAG_LEN		 1500
 #define MAX_FRAGS		 6
 #define MAX_PKT_LEN		 (MAX_FRAG_LEN * MAX_FRAGS)
@@ -45,6 +86,557 @@ struct reassembly_vector {
 	bool burst;
 };
 
+struct ipsec_test_packet pkt_ipv6_udp_p1 = {
+	.len = 1514,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 54,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0xb4, 0x2C, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x05, 0xb4, 0x2b, 0xe8,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p1_f1 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x00, 0x01, 0x5c, 0x92, 0xac, 0xf1,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x05, 0xb4, 0x2b, 0xe8,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p1_f2 = {
+	.len = 186,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x00, 0x84, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x05, 0x38, 0x5c, 0x92, 0xac, 0xf1,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p2 = {
+	.len = 4496,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 54,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x11, 0x5a, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x11, 0x5a, 0x8a, 0x11,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p2_f1 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x00, 0x01, 0x64, 0x6c, 0x68, 0x9f,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x11, 0x5a, 0x8a, 0x11,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p2_f2 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x05, 0x39, 0x64, 0x6c, 0x68, 0x9f,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p2_f3 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x0a, 0x71, 0x64, 0x6c, 0x68, 0x9f,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p2_f4 = {
+	.len = 496,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x01, 0xba, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x0f, 0xa8, 0x64, 0x6c, 0x68, 0x9f,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p3 = {
+	.len = 5796,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 54,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x16, 0x6e, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x16, 0x6e, 0x2f, 0x99,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p3_f1 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x00, 0x01, 0x65, 0xcf, 0x5a, 0xae,
+
+		/* UDP */
+		0x80, 0x00, 0x27, 0x10, 0x16, 0x6e, 0x2f, 0x99,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p3_f2 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x05, 0x39, 0x65, 0xcf, 0x5a, 0xae,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p3_f3 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x0a, 0x71, 0x65, 0xcf, 0x5a, 0xae,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p3_f4 = {
+	.len = 1398,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x05, 0x40, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x0f, 0xa9, 0x65, 0xcf, 0x5a, 0xae,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv6_udp_p3_f5 = {
+	.len = 460,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 62,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x86, 0xdd,
+
+		/* IP */
+		0x60, 0x00, 0x00, 0x00, 0x01, 0x96, 0x2c, 0x40,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x02,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0xff, 0xff, 0x02, 0x00, 0x00, 0x02,
+		0x11, 0x00, 0x14, 0xe0, 0x65, 0xcf, 0x5a, 0xae,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p1 = {
+	.len = 1514,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0xdc, 0x00, 0x01, 0x00, 0x00,
+		0x40, 0x11, 0x66, 0x0d, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x05, 0xc8, 0xb8, 0x4c,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p1_f1 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x01, 0x20, 0x00,
+		0x40, 0x11, 0x46, 0x5d, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x05, 0xc8, 0xb8, 0x4c,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p1_f2 = {
+	.len = 114,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x00, 0x64, 0x00, 0x01, 0x00, 0xaf,
+		0x40, 0x11, 0x6a, 0xd6, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p2 = {
+	.len = 4496,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x11, 0x82, 0x00, 0x02, 0x00, 0x00,
+		0x40, 0x11, 0x5a, 0x66, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x11, 0x6e, 0x16, 0x76,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p2_f1 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x02, 0x20, 0x00,
+		0x40, 0x11, 0x46, 0x5c, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x11, 0x6e, 0x16, 0x76,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p2_f2 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x02, 0x20, 0xaf,
+		0x40, 0x11, 0x45, 0xad, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p2_f3 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x02, 0x21, 0x5e,
+		0x40, 0x11, 0x44, 0xfe, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p2_f4 = {
+	.len = 296,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x01, 0x1a, 0x00, 0x02, 0x02, 0x0d,
+		0x40, 0x11, 0x68, 0xc1, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p3 = {
+	.len = 5796,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x16, 0x96, 0x00, 0x03, 0x00, 0x00,
+		0x40, 0x11, 0x55, 0x51, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x08, 0x00, 0x27, 0x10, 0x16, 0x82, 0xbb, 0xfd,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p3_f1 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x03, 0x20, 0x00,
+		0x40, 0x11, 0x46, 0x5b, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+
+		/* UDP */
+		0x80, 0x00, 0x27, 0x10, 0x16, 0x82, 0xbb, 0xfd,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p3_f2 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x03, 0x20, 0xaf,
+		0x40, 0x11, 0x45, 0xac, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p3_f3 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x03, 0x21, 0x5e,
+		0x40, 0x11, 0x44, 0xfd, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p3_f4 = {
+	.len = 1434,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x05, 0x8c, 0x00, 0x03, 0x22, 0x0d,
+		0x40, 0x11, 0x44, 0x4e, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
+struct ipsec_test_packet pkt_ipv4_udp_p3_f5 = {
+	.len = 196,
+	.l2_offset = 0,
+	.l3_offset = 14,
+	.l4_offset = 34,
+	.data = {
+		/* ETH */
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1,
+		0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00,
+
+		/* IP */
+		0x45, 0x00, 0x00, 0xb6, 0x00, 0x03, 0x02, 0xbc,
+		0x40, 0x11, 0x68, 0x75, 0x0d, 0x00, 0x00, 0x02,
+		0x02, 0x00, 0x00, 0x02,
+	},
+};
+
 struct ipsec_test_packet pkt_ipv4_plain = {
 	.len = 76,
 	.l2_offset = 0,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v2 4/4] app/test: add IP reassembly negative cases
  2022-01-20 16:48     ` [PATCH v2 0/4] app/test: add inline IPsec and reassembly cases Akhil Goyal
                         ` (2 preceding siblings ...)
  2022-01-20 16:48       ` [PATCH v2 3/4] app/test: add IP reassembly cases with multiple fragments Akhil Goyal
@ 2022-01-20 16:48       ` Akhil Goyal
  3 siblings, 0 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-20 16:48 UTC (permalink / raw)
  To: dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, andrew.rybchenko,
	olivier.matz, rosen.xu, jerinj, Akhil Goyal

test_inline_ipsec testsuite is added with cases where the IP reassembly
is incomplete and software will need to reassemble them later.
The failure cases added are:
- all fragments are not received.
- same fragment is received more than once.
- out of order fragments.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
---
 app/test/test_security_inline_proto.c | 71 +++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/app/test/test_security_inline_proto.c b/app/test/test_security_inline_proto.c
index d05325b205..b1794c1bc7 100644
--- a/app/test/test_security_inline_proto.c
+++ b/app/test/test_security_inline_proto.c
@@ -1179,6 +1179,68 @@ test_reassembly_ipv6_5frag(void)
 				      RTE_SECURITY_IPSEC_TUNNEL_IPV6);
 }
 
+static int
+test_reassembly_incomplete(void)
+{
+	/* Negative test case, not sending all fragments. */
+	struct reassembly_vector ipv4_incomplete_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_udp_p2,
+				.frags[0] = &pkt_ipv4_udp_p2_f1,
+				.frags[1] = &pkt_ipv4_udp_p2_f2,
+				.nb_frags = 2,
+	};
+	test_vector_payload_populate(&pkt_ipv4_udp_p2, true);
+	test_vector_payload_populate(&pkt_ipv4_udp_p2_f1, true);
+	test_vector_payload_populate(&pkt_ipv4_udp_p2_f2, false);
+
+	return test_ipsec_encap_decap(&ipv4_incomplete_case,
+				      RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+static int
+test_reassembly_overlap(void)
+{
+	/* Negative test case, sending 1 fragment twice. */
+	struct reassembly_vector ipv4_overlap_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_udp_p1,
+				.frags[0] = &pkt_ipv4_udp_p1_f1,
+				.frags[1] = &pkt_ipv4_udp_p1_f1, /* Overlap */
+				.frags[2] = &pkt_ipv4_udp_p1_f2,
+				.nb_frags = 3,
+	};
+	test_vector_payload_populate(&pkt_ipv4_udp_p2, true);
+	test_vector_payload_populate(&pkt_ipv4_udp_p2_f1, true);
+	test_vector_payload_populate(&pkt_ipv4_udp_p2_f2, false);
+
+	return test_ipsec_encap_decap(&ipv4_overlap_case,
+				      RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
+static int
+test_reassembly_out_of_order(void)
+{
+	/* Negative test case, out of order fragments. */
+	struct reassembly_vector ipv4_ooo_case = {
+				.sa_data = &conf_aes_128_gcm,
+				.full_pkt = &pkt_ipv4_udp_p2,
+				.frags[0] = &pkt_ipv4_udp_p2_f1,
+				.frags[1] = &pkt_ipv4_udp_p2_f3,
+				.frags[2] = &pkt_ipv4_udp_p2_f4,
+				.frags[3] = &pkt_ipv4_udp_p2_f2,
+				.nb_frags = 4,
+	};
+	test_vector_payload_populate(&pkt_ipv4_udp_p2, true);
+	test_vector_payload_populate(&pkt_ipv4_udp_p2_f1, true);
+	test_vector_payload_populate(&pkt_ipv4_udp_p2_f2, false);
+	test_vector_payload_populate(&pkt_ipv4_udp_p2_f3, false);
+	test_vector_payload_populate(&pkt_ipv4_udp_p2_f4, false);
+
+	return test_ipsec_encap_decap(&ipv4_ooo_case,
+				      RTE_SECURITY_IPSEC_TUNNEL_IPV4);
+}
+
 static struct unit_test_suite inline_ipsec_testsuite  = {
 	.suite_name = "Inline IPsec Ethernet Device Unit Test Suite",
 	.setup = testsuite_setup,
@@ -1214,6 +1276,15 @@ static struct unit_test_suite inline_ipsec_testsuite  = {
 		TEST_CASE_ST(ut_setup_inline_ipsec,
 				ut_teardown_inline_ipsec,
 				test_reassembly_ipv6_5frag),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_incomplete),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_overlap),
+		TEST_CASE_ST(ut_setup_inline_ipsec,
+				ut_teardown_inline_ipsec,
+				test_reassembly_out_of_order),
 
 		TEST_CASES_END() /**< NULL terminate unit test array */
 	}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [EXT] Re: [PATCH v2 1/4] ethdev: introduce IP reassembly offload
  2022-01-20 16:45       ` Stephen Hemminger
@ 2022-01-20 17:11         ` Akhil Goyal
  0 siblings, 0 replies; 53+ messages in thread
From: Akhil Goyal @ 2022-01-20 17:11 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, Anoob Joseph, radu.nicolau, declan.doherty, hemant.agrawal,
	matan, konstantin.ananyev, thomas, ferruh.yigit,
	andrew.rybchenko, olivier.matz, rosen.xu,
	Jerin Jacob Kollanukkaran

> On Thu, 20 Jan 2022 21:56:24 +0530
> Akhil Goyal <gakhil@marvell.com> wrote:
> 
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this structure may change without prior notice.
> > + *
> > + * A structure used to set IP reassembly configuration.
> > + *
> > + * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
> > + * the PMD will attempt IP reassembly for the received packets as per
> > + * properties defined in this structure:
> > + *
> > + */
> > +struct rte_eth_ip_reass_params {
> > +	/** Maximum time in ms which PMD can wait for other fragments. */
> > +	uint32_t reass_timeout;
> > +	/** Maximum number of fragments that can be reassembled. */
> > +	uint16_t max_frags;
> > +	/**
> > +	 * Flags to enable reassembly of packet types -
> > +	 * RTE_ETH_DEV_REASSEMBLY_F_xxx.
> > +	 */
> > +	uint16_t flags;
> > +};
> > +
> 
> Actually, this is not experimental. You are embedding this in dev_info
> and dev_info is not experimental; therefore the reassembly parameters
> can never change without breaking ABI of dev_info.

Agreed, will remove the experimental tag from this struct.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH 1/8] ethdev: introduce IP reassembly offload
  2022-01-03 15:08   ` [PATCH 1/8] " Akhil Goyal
  2022-01-11 16:03     ` Ananyev, Konstantin
@ 2022-01-22  7:38     ` Andrew Rybchenko
  1 sibling, 0 replies; 53+ messages in thread
From: Andrew Rybchenko @ 2022-01-22  7:38 UTC (permalink / raw)
  To: Akhil Goyal, dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, olivier.matz, rosen.xu

On 1/3/22 18:08, Akhil Goyal wrote:
> IP Reassembly is a costly operation if it is done in software.
> The operation becomes even more costlier if IP fragmants are encrypted.
> However, if it is offloaded to HW, it can considerably save application cycles.
> 
> Hence, a new offload RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY is introduced in
> ethdev for devices which can attempt reassembly of packets in hardware.
> rte_eth_dev_info is updated with the reassembly capabilities which a device
> can support.

Yes, reassembly is really complicated process taking possibility to have
overlapping fragments, out-of-order etc.
There are network attacks based on IP reassembly.
Will it simply result in IP reassembly failure if no buffers are left
for IP fragments? What will be reported in mbuf if some packets overlap?
Just raw packets as is or reassembly result with holes?
I think behavour should be specified/

> The resulting reassembled packet would be a typical segmented mbuf in
> case of success.
> 
> And if reassembly of fragments is failed or is incomplete (if fragments do
> not come before the reass_timeout), the mbuf ol_flags can be updated.
> This is updated in a subsequent patch.
> 
> Signed-off-by: Akhil Goyal <gakhil@marvell.com>
> ---
>   doc/guides/nics/features.rst | 12 ++++++++++++
>   lib/ethdev/rte_ethdev.c      |  1 +
>   lib/ethdev/rte_ethdev.h      | 32 +++++++++++++++++++++++++++++++-
>   3 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
> index 27be2d2576..1dfdee9602 100644
> --- a/doc/guides/nics/features.rst
> +++ b/doc/guides/nics/features.rst
> @@ -602,6 +602,18 @@ Supports inner packet L4 checksum.
>     ``tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM``.
>   
>   
> +.. _nic_features_ip_reassembly:
> +
> +IP reassembly
> +-------------
> +
> +Supports IP reassembly in hardware.
> +
> +* **[uses]     rte_eth_rxconf,rte_eth_rxmode**: ``offloads:RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY``.

Looking at the patch I see no changes and usage of rte_eth_rxconf and
rte_eth_rxmode. It should be added here later if corresponding changes
come in subsequent patches.

> +* **[provides] mbuf**: ``mbuf.ol_flags:RTE_MBUF_F_RX_IP_REASSEMBLY_INCOMPLETE``

Same here. The flag is not defined yet. So, it must not be mentioned in
the patch.
.
> +* **[provides] rte_eth_dev_info**: ``reass_capa``.
> +
> +
>   .. _nic_features_shared_rx_queue:
>   
>   Shared Rx queue


> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index fa299c8ad7..11427b2e4d 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h

[snip]

> @@ -1781,6 +1782,33 @@ enum rte_eth_representor_type {
>   	RTE_ETH_REPRESENTOR_PF,   /**< representor of Physical Function. */
>   };
>   
> +/* Flag to offload IP reassembly for IPv4 packets. */
> +#define RTE_ETH_DEV_REASSEMBLY_F_IPV4 (RTE_BIT32(0))
> +/* Flag to offload IP reassembly for IPv6 packets. */
> +#define RTE_ETH_DEV_REASSEMBLY_F_IPV6 (RTE_BIT32(1))
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this structure may change without prior notice.
> + *
> + * A structure used to set IP reassembly configuration.

In the patch the structure is used to provide capabilities,
not to set configuration.

If you are going to use the same structure in capabilities and
configuration, it could be handy, but really confusing since
interpretation of fields should be different.
As a bare minimum the difference must be specified in comments.
Right now all fields makes sense in capabilities and configuration:
maximum possible vs actual value, however, not everything could be
really configurable and it will become confusing. It is really hard
to discuss right now since the patch does not provide usage of the
structure for the configuration.

> + *
> + * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
> + * the PMD will attempt IP reassembly for the received packets as per
> + * properties defined in this structure:
> + *
> + */
> +struct rte_eth_ip_reass_params {
> +	/** Maximum time in ms which PMD can wait for other fragments. */
> +	uint32_t reass_timeout;

Please, specify units. May be even in field name. E.g. reass_timeout_ms.

> +	/** Maximum number of fragments that can be reassembled. */
> +	uint16_t max_frags;
> +	/**
> +	 * Flags to enable reassembly of packet types -
> +	 * RTE_ETH_DEV_REASSEMBLY_F_xxx.
> +	 */
> +	uint16_t flags;

If it is just for packet types, I'd suggest to name the field more
precise. Also it will avoid flags vs frags misreading.
Just an idea. Up to you.

> +};
> +
>   /**
>    * A structure used to retrieve the contextual information of
>    * an Ethernet device, such as the controlling driver of the
> @@ -1841,8 +1869,10 @@ struct rte_eth_dev_info {
>   	 * embedded managed interconnect/switch.
>   	 */
>   	struct rte_eth_switch_info switch_info;
> +	/** IP reassembly offload capabilities that a device can support. */
> +	struct rte_eth_ip_reass_params reass_capa;
>   
> -	uint64_t reserved_64s[2]; /**< Reserved for future fields */
> +	uint64_t reserved_64s[1]; /**< Reserved for future fields */
>   	void *reserved_ptrs[2];   /**< Reserved for future fields */
>   };
>   


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v2 2/4] ethdev: add dev op to set/get IP reassembly configuration
  2022-01-20 16:26     ` [PATCH v2 2/4] ethdev: add dev op to set/get IP reassembly configuration Akhil Goyal
@ 2022-01-22  8:17       ` Andrew Rybchenko
  0 siblings, 0 replies; 53+ messages in thread
From: Andrew Rybchenko @ 2022-01-22  8:17 UTC (permalink / raw)
  To: Akhil Goyal, dev
  Cc: anoobj, radu.nicolau, declan.doherty, hemant.agrawal, matan,
	konstantin.ananyev, thomas, ferruh.yigit, olivier.matz, rosen.xu,
	jerinj

On 1/20/22 19:26, Akhil Goyal wrote:
> A new ethernet device op is added to give application control over

ethernet -> Ethernet

> the IP reassembly configuration. This operation is an optional
> call from the application, default values are set by PMD and
> exposed via rte_eth_dev_info.

Are defaults or maximum support values exposed via rte_eth_dev_info?
I guess it should be maximum. Defaults can be obtained using
get without set.

> Application should always first retrieve the capabilities from
> rte_eth_dev_info and then set the fields accordingly.
> User can get the currently set values using the get API.
> 
> Signed-off-by: Akhil Goyal <gakhil@marvell.com>

[snip]


> +/**
> + * @internal
> + * Set configuration parameters for enabling IP reassembly offload in hardware.
> + *
> + * @param dev
> + *   Port (ethdev) handle
> + *
> + * @param[in] conf
> + *   Configuration parameters for IP reassembly.
> + *
> + * @return
> + *   Negative errno value on error, zero otherwise
> + */
> +typedef int (*eth_ip_reassembly_conf_set_t)(struct rte_eth_dev *dev,
> +				       struct rte_eth_ip_reass_params *conf);

const

[snip]

> +int
> +rte_eth_ip_reassembly_conf_get(uint16_t port_id,
> +			       struct rte_eth_ip_reass_params *conf)

Please, preserve order everywhere. If get comes first, it must be first
everywhere.

> +{
> +	struct rte_eth_dev *dev;
> +
> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> +	dev = &rte_eth_devices[port_id];
> +
> +	if (conf == NULL) {
> +		RTE_ETHDEV_LOG(ERR, "Cannot get reassembly info to NULL");
> +		return -EINVAL;
> +	}

Why is order of check different in set and get?

> +
> +	if (dev->data->dev_configured == 0) {
> +		RTE_ETHDEV_LOG(ERR,
> +			"Device with port_id=%"PRIu16" is not configured.\n",
> +			port_id);
> +		return -EINVAL;
> +	}
> +
> +	if ((dev->data->dev_conf.rxmode.offloads &
> +			RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY) == 0) {
> +		RTE_ETHDEV_LOG(ERR,
> +			"The port (ID=%"PRIu16") is not configured for IP reassembly\n",
> +			port_id);
> +		return -EINVAL;
> +	}
> +
> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->ip_reassembly_conf_get,
> +				-ENOTSUP);
> +	memset(conf, 0, sizeof(struct rte_eth_ip_reass_params));
> +	return eth_err(port_id,
> +		       (*dev->dev_ops->ip_reassembly_conf_get)(dev, conf));
> +}
> +
>   RTE_LOG_REGISTER_DEFAULT(rte_eth_dev_logtype, INFO);
>   
>   RTE_INIT(ethdev_init_telemetry)
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index 11427b2e4d..53af158bcb 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -5218,6 +5218,57 @@ int rte_eth_representor_info_get(uint16_t port_id,
>   __rte_experimental
>   int rte_eth_rx_metadata_negotiate(uint16_t port_id, uint64_t *features);
>   
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Get IP reassembly configuration parameters currently set in PMD,
> + * if device rx offload flag (RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY) is

rx -> Rx

> + * enabled and the PMD supports IP reassembly offload.
> + *
> + * @param port_id
> + *   The port identifier of the device.
> + * @param conf
> + *   A pointer to rte_eth_ip_reass_params structure.
> + * @return
> + *   - (-ENOTSUP) if offload configuration is not supported by device.
> + *   - (-EINVAL) if offload is not enabled in rte_eth_conf.
> + *   - (-ENODEV) if *port_id* invalid.
> + *   - (-EIO) if device is removed.
> + *   - (0) on success.
> + */
> +__rte_experimental
> +int rte_eth_ip_reassembly_conf_get(uint16_t port_id,
> +				   struct rte_eth_ip_reass_params *conf);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Set IP reassembly configuration parameters if device rx offload

rx -> Rx

> + * flag (RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY) is enabled and the PMD
> + * supports IP reassembly offload. User should first check the
> + * reass_capa in rte_eth_dev_info before setting the configuration.
> + * The values of configuration parameters must not exceed the device
> + * capabilities.

It sounds like set API should retrieve dev_info and check set
values vs maximums.

> The use of this API is optional and if called, it
> + * should be called before rte_eth_dev_start().

It should be highlighted that the device must be already configured.

> + *
> + * @param port_id
> + *   The port identifier of the device.
> + * @param conf
> + *   A pointer to rte_eth_ip_reass_params structure.
> + * @return
> + *   - (-ENOTSUP) if offload configuration is not supported by device.
> + *   - (-EINVAL) if offload is not enabled in rte_eth_conf.
> + *   - (-ENODEV) if *port_id* invalid.
> + *   - (-EIO) if device is removed.
> + *   - (0) on success.
> + */
> +__rte_experimental
> +int rte_eth_ip_reassembly_conf_set(uint16_t port_id,
> +				   struct rte_eth_ip_reass_params *conf);
> +
> +
>   #include <rte_ethdev_core.h>
>   
>   /**
> diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
> index c2fb0669a4..ad829dd47e 100644
> --- a/lib/ethdev/version.map
> +++ b/lib/ethdev/version.map
> @@ -256,6 +256,10 @@ EXPERIMENTAL {
>   	rte_flow_flex_item_create;
>   	rte_flow_flex_item_release;
>   	rte_flow_pick_transfer_proxy;
> +
> +	#added in 22.03
> +	rte_eth_ip_reassembly_conf_get;
> +	rte_eth_ip_reassembly_conf_set;
>   };
>   
>   INTERNAL {


^ permalink raw reply	[flat|nested] 53+ messages in thread

end of thread, other threads:[~2022-01-22  8:17 UTC | newest]

Thread overview: 53+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-23 10:02 [dpdk-dev] [PATCH] RFC: ethdev: add reassembly offload Akhil Goyal
2021-08-23 10:18 ` Andrew Rybchenko
2021-08-29 13:14   ` [dpdk-dev] [EXT] " Akhil Goyal
2021-09-21 19:59     ` Thomas Monjalon
2021-09-07  8:47 ` [dpdk-dev] " Ferruh Yigit
2021-09-08 10:29   ` [dpdk-dev] [EXT] " Anoob Joseph
2021-09-13  6:56     ` Xu, Rosen
2021-09-13  7:22       ` Andrew Rybchenko
2021-09-14  5:14         ` Anoob Joseph
2021-09-08  6:34 ` [dpdk-dev] " Xu, Rosen
2021-09-08  6:36   ` Xu, Rosen
2022-01-03 15:08 ` [PATCH 0/8] ethdev: introduce IP " Akhil Goyal
2022-01-03 15:08   ` [PATCH 1/8] " Akhil Goyal
2022-01-11 16:03     ` Ananyev, Konstantin
2022-01-22  7:38     ` Andrew Rybchenko
2022-01-03 15:08   ` [PATCH 2/8] ethdev: add dev op for IP reassembly configuration Akhil Goyal
2022-01-11 16:09     ` Ananyev, Konstantin
2022-01-11 18:54       ` Akhil Goyal
2022-01-12 10:22         ` Ananyev, Konstantin
2022-01-12 10:32           ` Akhil Goyal
2022-01-12 10:48             ` Ananyev, Konstantin
2022-01-12 11:06               ` Akhil Goyal
2022-01-13 13:31                 ` Akhil Goyal
2022-01-13 14:41                   ` Ananyev, Konstantin
2022-01-03 15:08   ` [PATCH 3/8] ethdev: add mbuf dynfield for incomplete IP reassembly Akhil Goyal
2022-01-11 17:04     ` Ananyev, Konstantin
2022-01-11 18:44       ` Akhil Goyal
2022-01-12 10:30         ` Ananyev, Konstantin
2022-01-12 10:59           ` Akhil Goyal
2022-01-13 22:29             ` Ananyev, Konstantin
2022-01-13 13:18         ` Akhil Goyal
2022-01-13 14:36           ` Ananyev, Konstantin
2022-01-13 15:04             ` Akhil Goyal
2022-01-03 15:08   ` [PATCH 4/8] security: add IPsec option for " Akhil Goyal
2022-01-03 15:08   ` [PATCH 5/8] app/test: add unit cases for inline IPsec offload Akhil Goyal
2022-01-20 16:48     ` [PATCH v2 0/4] app/test: add inline IPsec and reassembly cases Akhil Goyal
2022-01-20 16:48       ` [PATCH v2 1/4] app/test: add unit cases for inline IPsec offload Akhil Goyal
2022-01-20 16:48       ` [PATCH v2 2/4] app/test: add IP reassembly case with no frags Akhil Goyal
2022-01-20 16:48       ` [PATCH v2 3/4] app/test: add IP reassembly cases with multiple fragments Akhil Goyal
2022-01-20 16:48       ` [PATCH v2 4/4] app/test: add IP reassembly negative cases Akhil Goyal
2022-01-03 15:08   ` [PATCH 6/8] app/test: add IP reassembly case with no frags Akhil Goyal
2022-01-03 15:08   ` [PATCH 7/8] app/test: add IP reassembly cases with multiple fragments Akhil Goyal
2022-01-03 15:08   ` [PATCH 8/8] app/test: add IP reassembly negative cases Akhil Goyal
2022-01-06  9:51   ` [PATCH 0/8] ethdev: introduce IP reassembly offload David Marchand
2022-01-06  9:54     ` [EXT] " Akhil Goyal
2022-01-20 16:26   ` [PATCH v2 0/4] " Akhil Goyal
2022-01-20 16:26     ` [PATCH v2 1/4] " Akhil Goyal
2022-01-20 16:45       ` Stephen Hemminger
2022-01-20 17:11         ` [EXT] " Akhil Goyal
2022-01-20 16:26     ` [PATCH v2 2/4] ethdev: add dev op to set/get IP reassembly configuration Akhil Goyal
2022-01-22  8:17       ` Andrew Rybchenko
2022-01-20 16:26     ` [PATCH v2 3/4] ethdev: add mbuf dynfield for incomplete IP reassembly Akhil Goyal
2022-01-20 16:26     ` [PATCH v2 4/4] security: add IPsec option for " Akhil Goyal

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ http://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git