[PATCH] ip_frag: support IPv6 reassembly with extensions

DPDK patches and discussions
 help / color / mirror / Atom feed

* [PATCH] ip_frag: support IPv6 reassembly with extensions
@ 2024-08-26 11:23 vignesh.purushotham.srinivas
  2024-08-26 15:41 ` Stephen Hemminger
  2024-09-17 18:07 ` Konstantin Ananyev
  0 siblings, 2 replies; 6+ messages in thread
From: vignesh.purushotham.srinivas @ 2024-08-26 11:23 UTC (permalink / raw)
  To: konstantin.v.ananyev; +Cc: dev, Vignesh PS

From: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>

Add support to ip_frag library to perform IPv6 reassembly
when extension headers are present before the fragment
extension in the packet.

Signed-off-by: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>
---
 .mailmap                          |  1 +
 lib/ip_frag/ip_frag_common.h      |  2 +
 lib/ip_frag/ip_reassembly.h       |  2 +
 lib/ip_frag/rte_ipv6_reassembly.c | 68 +++++++++++++++++++++++++++----
 4 files changed, 64 insertions(+), 9 deletions(-)

diff --git a/.mailmap b/.mailmap
index 4a508bafad..69b229a5b7 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1548,6 +1548,7 @@ Viacheslav Ovsiienko <viacheslavo@nvidia.com> <viacheslavo@mellanox.com>
 Victor Kaplansky <victork@redhat.com>
 Victor Raj <victor.raj@intel.com>
 Vidya Sagar Velumuri <vvelumuri@marvell.com>
+Vignesh PS <vignesh.purushotham.srinivas@ericsson.com> <vig.vigneshps1995@gmail.com>
 Vignesh Sridhar <vignesh.sridhar@intel.com>
 Vijayakumar Muthuvel Manickam <mmvijay@gmail.com>
 Vijaya Mohan Guvva <vijay1054@gmail.com>
diff --git a/lib/ip_frag/ip_frag_common.h b/lib/ip_frag/ip_frag_common.h
index 51fc9d47fb..db2665e846 100644
--- a/lib/ip_frag/ip_frag_common.h
+++ b/lib/ip_frag/ip_frag_common.h
@@ -169,6 +169,8 @@ ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms)
 	fp->total_size = UINT32_MAX;
 	fp->frag_size = 0;
 	fp->last_idx = IP_MIN_FRAG_NUM;
+	fp->exts_len = 0;
+	fp->next_proto = NULL;
 	fp->frags[IP_LAST_FRAG_IDX] = zero_frag;
 	fp->frags[IP_FIRST_FRAG_IDX] = zero_frag;
 }
diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
index 54afed5417..429e74f1b3 100644
--- a/lib/ip_frag/ip_reassembly.h
+++ b/lib/ip_frag/ip_reassembly.h
@@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
 	uint32_t total_size;                   /* expected reassembled size */
 	uint32_t frag_size;                    /* size of fragments received */
 	uint32_t last_idx;                     /* index of next entry to fill */
+	uint32_t exts_len;                     /* length of extension hdrs for first fragment */
+	uint8_t *next_proto;                   /* pointer of the next_proto field */
 	struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
 };
 
diff --git a/lib/ip_frag/rte_ipv6_reassembly.c b/lib/ip_frag/rte_ipv6_reassembly.c
index 88863a98d1..8decf592a6 100644
--- a/lib/ip_frag/rte_ipv6_reassembly.c
+++ b/lib/ip_frag/rte_ipv6_reassembly.c
@@ -91,19 +91,19 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
 	/* update ipv6 header for the reassembled datagram */
 	ip_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, m->l2_len);
 
+	payload_len += fp->exts_len;
 	ip_hdr->payload_len = rte_cpu_to_be_16(payload_len);
 
 	/*
 	 * remove fragmentation header. note that per RFC2460, we need to update
 	 * the last non-fragmentable header with the "next header" field to contain
-	 * type of the first fragmentable header, but we currently don't support
-	 * other headers, so we assume there are no other headers and thus update
-	 * the main IPv6 header instead.
+	 * type of the first fragmentable header.
 	 */
-	move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
-	frag_hdr = (struct rte_ipv6_fragment_ext *) (ip_hdr + 1);
-	ip_hdr->proto = frag_hdr->next_header;
+	frag_hdr = (struct rte_ipv6_fragment_ext *)
+		((uint8_t *) (ip_hdr + 1) + fp->exts_len);
+	*fp->next_proto = frag_hdr->next_header;
 
+	move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
 	ip_frag_memmove(rte_pktmbuf_mtod_offset(m, char *, sizeof(*frag_hdr)),
 			rte_pktmbuf_mtod(m, char*), move_len);
 
@@ -112,6 +112,39 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
 	return m;
 }
 
+/*
+ * Function to crawl through the extension header stack.
+ * This function breaks as soon a the fragment header is
+ * found and returns the total length the traversed exts
+ * and the last extension before the fragment header
+ */
+static inline uint32_t
+ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t **last_ext)
+{
+	uint32_t total_len = 0;
+	uint8_t num_exts = 0;
+	size_t ext_len = 0;
+	*last_ext = (uint8_t *)(ip_hdr + 1);
+	int next_proto = ip_hdr->proto;
+#define MAX_NUM_IPV6_EXTS 8
+
+	while (next_proto != IPPROTO_FRAGMENT &&
+		num_exts < MAX_NUM_IPV6_EXTS &&
+		(next_proto = rte_ipv6_get_next_ext(
+		*last_ext, next_proto, &ext_len)) >= 0) {
+
+		total_len += ext_len;
+
+		if (next_proto == IPPROTO_FRAGMENT)
+			return total_len;
+
+		*last_ext += ext_len;
+		num_exts++;
+	}
+
+	return total_len;
+}
+
 /*
  * Process new mbuf with fragment of IPV6 datagram.
  * Incoming mbuf should have its l2_len/l3_len fields setup correctly.
@@ -139,6 +172,8 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
 {
 	struct ip_frag_pkt *fp;
 	struct ip_frag_key key;
+	uint8_t *last_ipv6_ext;
+	uint32_t exts_len;
 	uint16_t ip_ofs;
 	int32_t ip_len;
 	int32_t trim;
@@ -154,10 +189,10 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
 	/*
 	 * as per RFC2460, payload length contains all extension headers
 	 * as well.
-	 * since we don't support anything but frag headers,
-	 * this is what we remove from the payload len.
+	 * so we remove the extension len from the payload len.
 	 */
-	ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr);
+	exts_len = ip_frag_get_last_exthdr(ip_hdr, &last_ipv6_ext);
+	ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - exts_len - sizeof(*frag_hdr);
 	trim = mb->pkt_len - (ip_len + mb->l3_len + mb->l2_len);
 
 	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
@@ -201,6 +236,21 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
 	/* process the fragmented packet. */
 	mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len,
 			MORE_FRAGS(frag_hdr->frag_data));
+
+	/* store extension stack info, only for first fragment */
+	if (ip_ofs == 0) {
+		/*
+		 * fp->next_proto points to either the IP's next header
+		 * or th next header of the extension before the fragment
+		 * extension
+		 */
+		fp->next_proto = (uint8_t *)&ip_hdr->proto;
+		if (exts_len > 0) {
+			fp->exts_len = exts_len;
+			fp->next_proto = last_ipv6_ext;
+		}
+	}
+
 	ip_frag_inuse(tbl, fp);
 
 	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
-- 
2.34.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ip_frag: support IPv6 reassembly with extensions
  2024-08-26 11:23 [PATCH] ip_frag: support IPv6 reassembly with extensions vignesh.purushotham.srinivas
@ 2024-08-26 15:41 ` Stephen Hemminger
  2024-09-17 17:57   ` Konstantin Ananyev
  2024-09-17 18:07 ` Konstantin Ananyev
  1 sibling, 1 reply; 6+ messages in thread
From: Stephen Hemminger @ 2024-08-26 15:41 UTC (permalink / raw)
  To: vignesh.purushotham.srinivas; +Cc: konstantin.v.ananyev, dev

On Mon, 26 Aug 2024 13:23:28 +0200
<vignesh.purushotham.srinivas@ericsson.com> wrote:

> diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
> index 54afed5417..429e74f1b3 100644
> --- a/lib/ip_frag/ip_reassembly.h
> +++ b/lib/ip_frag/ip_reassembly.h
> @@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
>  	uint32_t total_size;                   /* expected reassembled size */
>  	uint32_t frag_size;                    /* size of fragments received */
>  	uint32_t last_idx;                     /* index of next entry to fill */
> +	uint32_t exts_len;                     /* length of extension hdrs for first fragment */
> +	uint8_t *next_proto;                   /* pointer of the next_proto field */
>  	struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
>  };

This creates a 32 bit hole in the structure.
Better to put next_proto after the start field.

> +
> +	while (next_proto != IPPROTO_FRAGMENT &&
> +		num_exts < MAX_NUM_IPV6_EXTS &&
> +		(next_proto = rte_ipv6_get_next_ext(
> +		*last_ext, next_proto, &ext_len)) >= 0) {

I would break up this loop condition for clarity.
Something like:

	while (next_proto != IPPROTO_FRAGMENT && num_exts < MAX_NUM_IPV6_EXTS) {
		next_proto = rte_ipv6_get_next_ext(*last_ext, next_proto, &ext_len);
		if (next_proto < 0)
			break

Also, need a new test cases for this.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH] ip_frag: support IPv6 reassembly with extensions
  2024-08-26 15:41 ` Stephen Hemminger
@ 2024-09-17 17:57   ` Konstantin Ananyev
  2024-10-14 10:38     ` Vignesh Purushotham Srinivas
  0 siblings, 1 reply; 6+ messages in thread
From: Konstantin Ananyev @ 2024-09-17 17:57 UTC (permalink / raw)
  To: Stephen Hemminger, vignesh.purushotham.srinivas; +Cc: konstantin.v.ananyev, dev


> 
> On Mon, 26 Aug 2024 13:23:28 +0200
> <vignesh.purushotham.srinivas@ericsson.com> wrote:
> 
> > diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
> > index 54afed5417..429e74f1b3 100644
> > --- a/lib/ip_frag/ip_reassembly.h
> > +++ b/lib/ip_frag/ip_reassembly.h
> > @@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
> >  	uint32_t total_size;                   /* expected reassembled size */
> >  	uint32_t frag_size;                    /* size of fragments received */
> >  	uint32_t last_idx;                     /* index of next entry to fill */
> > +	uint32_t exts_len;                     /* length of extension hdrs for first fragment */
> > +	uint8_t *next_proto;                   /* pointer of the next_proto field */
> >  	struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
> >  };
> 
> This creates a 32 bit hole in the structure.
> Better to put next_proto after the start field.

Another alternative - use offset within the mbuf instead of pointer.

> 
> > +
> > +	while (next_proto != IPPROTO_FRAGMENT &&
> > +		num_exts < MAX_NUM_IPV6_EXTS &&
> > +		(next_proto = rte_ipv6_get_next_ext(
> > +		*last_ext, next_proto, &ext_len)) >= 0) {
> 
> I would break up this loop condition for clarity.

+ 1

> Something like:
> 
> 	while (next_proto != IPPROTO_FRAGMENT && num_exts < MAX_NUM_IPV6_EXTS) {
> 		next_proto = rte_ipv6_get_next_ext(*last_ext, next_proto, &ext_len);
> 		if (next_proto < 0)
> 			break
> 
> Also, need a new test cases for this.

Agree, that would be good thing to add.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH] ip_frag: support IPv6 reassembly with extensions
  2024-08-26 11:23 [PATCH] ip_frag: support IPv6 reassembly with extensions vignesh.purushotham.srinivas
  2024-08-26 15:41 ` Stephen Hemminger
@ 2024-09-17 18:07 ` Konstantin Ananyev
  2024-10-14 16:11   ` Vignesh Purushotham Srinivas
  1 sibling, 1 reply; 6+ messages in thread
From: Konstantin Ananyev @ 2024-09-17 18:07 UTC (permalink / raw)
  To: vignesh.purushotham.srinivas, konstantin.v.ananyev; +Cc: dev


> From: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>
> 
> Add support to ip_frag library to perform IPv6 reassembly
> when extension headers are present before the fragment
> extension in the packet.
> 
> Signed-off-by: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>
> ---
>  .mailmap                          |  1 +
>  lib/ip_frag/ip_frag_common.h      |  2 +
>  lib/ip_frag/ip_reassembly.h       |  2 +
>  lib/ip_frag/rte_ipv6_reassembly.c | 68 +++++++++++++++++++++++++++----
>  4 files changed, 64 insertions(+), 9 deletions(-)
> 
> diff --git a/.mailmap b/.mailmap
> index 4a508bafad..69b229a5b7 100644
> --- a/.mailmap
> +++ b/.mailmap
> @@ -1548,6 +1548,7 @@ Viacheslav Ovsiienko <viacheslavo@nvidia.com> <viacheslavo@mellanox.com>
>  Victor Kaplansky <victork@redhat.com>
>  Victor Raj <victor.raj@intel.com>
>  Vidya Sagar Velumuri <vvelumuri@marvell.com>
> +Vignesh PS <vignesh.purushotham.srinivas@ericsson.com> <vig.vigneshps1995@gmail.com>
>  Vignesh Sridhar <vignesh.sridhar@intel.com>
>  Vijayakumar Muthuvel Manickam <mmvijay@gmail.com>
>  Vijaya Mohan Guvva <vijay1054@gmail.com>
> diff --git a/lib/ip_frag/ip_frag_common.h b/lib/ip_frag/ip_frag_common.h
> index 51fc9d47fb..db2665e846 100644
> --- a/lib/ip_frag/ip_frag_common.h
> +++ b/lib/ip_frag/ip_frag_common.h
> @@ -169,6 +169,8 @@ ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms)
>  	fp->total_size = UINT32_MAX;
>  	fp->frag_size = 0;
>  	fp->last_idx = IP_MIN_FRAG_NUM;
> +	fp->exts_len = 0;
> +	fp->next_proto = NULL;
>  	fp->frags[IP_LAST_FRAG_IDX] = zero_frag;
>  	fp->frags[IP_FIRST_FRAG_IDX] = zero_frag;
>  }
> diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
> index 54afed5417..429e74f1b3 100644
> --- a/lib/ip_frag/ip_reassembly.h
> +++ b/lib/ip_frag/ip_reassembly.h
> @@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
>  	uint32_t total_size;                   /* expected reassembled size */
>  	uint32_t frag_size;                    /* size of fragments received */
>  	uint32_t last_idx;                     /* index of next entry to fill */
> +	uint32_t exts_len;                     /* length of extension hdrs for first fragment */
> +	uint8_t *next_proto;                   /* pointer of the next_proto field */
>  	struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
>  };
> 
> diff --git a/lib/ip_frag/rte_ipv6_reassembly.c b/lib/ip_frag/rte_ipv6_reassembly.c
> index 88863a98d1..8decf592a6 100644
> --- a/lib/ip_frag/rte_ipv6_reassembly.c
> +++ b/lib/ip_frag/rte_ipv6_reassembly.c
> @@ -91,19 +91,19 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
>  	/* update ipv6 header for the reassembled datagram */
>  	ip_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, m->l2_len);
> 
> +	payload_len += fp->exts_len;
>  	ip_hdr->payload_len = rte_cpu_to_be_16(payload_len);
> 
>  	/*
>  	 * remove fragmentation header. note that per RFC2460, we need to update
>  	 * the last non-fragmentable header with the "next header" field to contain
> -	 * type of the first fragmentable header, but we currently don't support
> -	 * other headers, so we assume there are no other headers and thus update
> -	 * the main IPv6 header instead.
> +	 * type of the first fragmentable header.
>  	 */
> -	move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
> -	frag_hdr = (struct rte_ipv6_fragment_ext *) (ip_hdr + 1);
> -	ip_hdr->proto = frag_hdr->next_header;
> +	frag_hdr = (struct rte_ipv6_fragment_ext *)
> +		((uint8_t *) (ip_hdr + 1) + fp->exts_len);
> +	*fp->next_proto = frag_hdr->next_header;
> 
> +	move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
>  	ip_frag_memmove(rte_pktmbuf_mtod_offset(m, char *, sizeof(*frag_hdr)),
>  			rte_pktmbuf_mtod(m, char*), move_len);
> 
> @@ -112,6 +112,39 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
>  	return m;
>  }
> 
> +/*
> + * Function to crawl through the extension header stack.
> + * This function breaks as soon a the fragment header is
> + * found and returns the total length the traversed exts
> + * and the last extension before the fragment header
> + */
> +static inline uint32_t
> +ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t **last_ext)
> +{
> +	uint32_t total_len = 0;
> +	uint8_t num_exts = 0;
> +	size_t ext_len = 0;
> +	*last_ext = (uint8_t *)(ip_hdr + 1);
> +	int next_proto = ip_hdr->proto;
> +#define MAX_NUM_IPV6_EXTS 8

As a nit - let's keep coding style consistent:
Pls move #define outside the function definition. 

> +
> +	while (next_proto != IPPROTO_FRAGMENT &&
> +		num_exts < MAX_NUM_IPV6_EXTS &&
> +		(next_proto = rte_ipv6_get_next_ext(
> +		*last_ext, next_proto, &ext_len)) >= 0) {
> +
> +		total_len += ext_len;
> +
> +		if (next_proto == IPPROTO_FRAGMENT)
> +			return total_len;
> +
> +		*last_ext += ext_len;
> +		num_exts++;
> +	}

So if  IPPROTO_FRAGMENT was not found, we just use extension #8 instead?
Shouldn't we return an error in that case,  and probably drop the fragment?

> +	return total_len;
> +}
> +
>  /*
>   * Process new mbuf with fragment of IPV6 datagram.
>   * Incoming mbuf should have its l2_len/l3_len fields setup correctly.
> @@ -139,6 +172,8 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
>  {
>  	struct ip_frag_pkt *fp;
>  	struct ip_frag_key key;
> +	uint8_t *last_ipv6_ext;
> +	uint32_t exts_len;
>  	uint16_t ip_ofs;
>  	int32_t ip_len;
>  	int32_t trim;
> @@ -154,10 +189,10 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
>  	/*
>  	 * as per RFC2460, payload length contains all extension headers
>  	 * as well.
> -	 * since we don't support anything but frag headers,
> -	 * this is what we remove from the payload len.
> +	 * so we remove the extension len from the payload len.
>  	 */
> -	ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr);
> +	exts_len = ip_frag_get_last_exthdr(ip_hdr, &last_ipv6_ext);
> +	ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - exts_len - sizeof(*frag_hdr);

Hmm..., as I remember ip_len is what we want to preserve in the packet...
Why we want to remove all previous ext headers here?

>  	trim = mb->pkt_len - (ip_len + mb->l3_len + mb->l2_len);
> 
>  	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
> @@ -201,6 +236,21 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
>  	/* process the fragmented packet. */
>  	mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len,
>  			MORE_FRAGS(frag_hdr->frag_data));

Can you explain why we setting these new fp fields after 'ip_frag_process()'?
Ip_frag_process() itself can call reassembly() - if all fragments are already in place.

> +
> +	/* store extension stack info, only for first fragment */
> +	if (ip_ofs == 0) {

If we want it for first fragment only, why not invoke ip_frag_get_last_exthdr()
only when ip_ofs == 0?
 
> +		/*
> +		 * fp->next_proto points to either the IP's next header
> +		 * or th next header of the extension before the fragment
> +		 * extension
> +		 */
> +		fp->next_proto = (uint8_t *)&ip_hdr->proto;
> +		if (exts_len > 0) {
> +			fp->exts_len = exts_len;
> +			fp->next_proto = last_ipv6_ext;
> +		}
> +	}
> +
>  	ip_frag_inuse(tbl, fp);
> 
>  	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ip_frag: support IPv6 reassembly with extensions
  2024-09-17 17:57   ` Konstantin Ananyev
@ 2024-10-14 10:38     ` Vignesh Purushotham Srinivas
  0 siblings, 0 replies; 6+ messages in thread
From: Vignesh Purushotham Srinivas @ 2024-10-14 10:38 UTC (permalink / raw)
  To: Vignesh Purushotham Srinivas; +Cc: dev, konstantin.v.ananyev

-----Original Message-----
From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
To: Stephen Hemminger <stephen@networkplumber.org>,
vignesh.purushotham.srinivas@ericsson.com
<vignesh.purushotham.srinivas@ericsson.com>
Cc: konstantin.v.ananyev@yandex.ru <konstantin.v.ananyev@yandex.ru>,
dev@dpdk.org <dev@dpdk.org>
Subject: RE: [PATCH] ip_frag: support IPv6 reassembly with extensions
Date: Tue, 17 Sep 2024 17:57:59 +0000

[You don't often get email from konstantin.ananyev@huawei.com. Learn
why this is important at
https://aka.ms/LearnAboutSenderIdentification ]

> 
> On Mon, 26 Aug 2024 13:23:28 +0200
> <vignesh.purushotham.srinivas@ericsson.com> wrote:
> 
> > diff --git a/lib/ip_frag/ip_reassembly.h
> > b/lib/ip_frag/ip_reassembly.h
> > index 54afed5417..429e74f1b3 100644
> > --- a/lib/ip_frag/ip_reassembly.h
> > +++ b/lib/ip_frag/ip_reassembly.h
> > @@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
> >     uint32_t total_size;                   /* expected reassembled
> > size */
> >     uint32_t frag_size;                    /* size of fragments
> > received */
> >     uint32_t last_idx;                     /* index of next entry
> > to fill */
> > +   uint32_t exts_len;                     /* length of extension
> > hdrs for first fragment */
> > +   uint8_t *next_proto;                   /* pointer of the
> > next_proto field */
> >     struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
> >  };
> 
> This creates a 32 bit hole in the structure.
> Better to put next_proto after the start field.

Another alternative - use offset within the mbuf instead of pointer.

ACK

> 
> > +
> > +   while (next_proto != IPPROTO_FRAGMENT &&
> > +           num_exts < MAX_NUM_IPV6_EXTS &&
> > +           (next_proto = rte_ipv6_get_next_ext(
> > +           *last_ext, next_proto, &ext_len)) >= 0) {
> 
> I would break up this loop condition for clarity.

+ 1

ACK

> Something like:
> 
>       while (next_proto != IPPROTO_FRAGMENT && num_exts <
> MAX_NUM_IPV6_EXTS) {
>               next_proto = rte_ipv6_get_next_ext(*last_ext,
> next_proto, &ext_len);
>               if (next_proto < 0)
>                       break
> 
> Also, need a new test cases for this.

Agree, that would be good thing to add.

ACK


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ip_frag: support IPv6 reassembly with extensions
  2024-09-17 18:07 ` Konstantin Ananyev
@ 2024-10-14 16:11   ` Vignesh Purushotham Srinivas
  0 siblings, 0 replies; 6+ messages in thread
From: Vignesh Purushotham Srinivas @ 2024-10-14 16:11 UTC (permalink / raw)
  To: konstantin.ananyev, konstantin.v.ananyev; +Cc: dev

-----Original Message-----
From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
To: vignesh.purushotham.srinivas@ericsson.com
<vignesh.purushotham.srinivas@ericsson.com>,
konstantin.v.ananyev@yandex.ru <konstantin.v.ananyev@yandex.ru>
Cc: dev@dpdk.org <dev@dpdk.org>
Subject: RE: [PATCH] ip_frag: support IPv6 reassembly with extensions
Date: Tue, 17 Sep 2024 18:07:25 +0000

> > +/*
> > + * Function to crawl through the extension header stack.
> > + * This function breaks as soon a the fragment header is
> > + * found and returns the total length the traversed exts
> > + * and the last extension before the fragment header
> > + */
> > +static inline uint32_t
> > +ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t
**last_ext)
> > +{
> > +     uint32_t total_len = 0;
> > +     uint8_t num_exts = 0;
> > +     size_t ext_len = 0;
> > +     *last_ext = (uint8_t *)(ip_hdr + 1);
> > +     int next_proto = ip_hdr->proto;
> > +#define MAX_NUM_IPV6_EXTS 8
> 
> As a nit - let's keep coding style consistent:
> Pls move #define outside the function definition.

ACK

> > +
> > +     while (next_proto != IPPROTO_FRAGMENT &&
> > +             num_exts < MAX_NUM_IPV6_EXTS &&
> > +             (next_proto = rte_ipv6_get_next_ext(
> > +             *last_ext, next_proto, &ext_len)) >= 0) {
> > +
> > +             total_len += ext_len;
> > +
> > +             if (next_proto == IPPROTO_FRAGMENT)
> > +                     return total_len;
> > +
> > +             *last_ext += ext_len;
> > +             num_exts++;
> > +     }
> 
> So if  IPPROTO_FRAGMENT was not found, we just use extension #8
instead?
> Shouldn't we return an error in that case,  and probably drop the
fragment?

Hmmm, looks like a bug. Will fix this in the next version

> > +     return total_len;
> > +}
> > +
> >  /*
> >   * Process new mbuf with fragment of IPV6 datagram.
> >   * Incoming mbuf should have its l2_len/l3_len fields setup
correctly.
> > @@ -139,6 +172,8 @@ rte_ipv6_frag_reassemble_packet(struct
rte_ip_frag_tbl *tbl,
> >  {
> >       struct ip_frag_pkt *fp;
> >       struct ip_frag_key key;
> > +     uint8_t *last_ipv6_ext;
> > +     uint32_t exts_len;
> >       uint16_t ip_ofs;
> >       int32_t ip_len;
> >       int32_t trim;
> > @@ -154,10 +189,10 @@ rte_ipv6_frag_reassemble_packet(struct
rte_ip_frag_tbl *tbl,
> >       /*
> >        * as per RFC2460, payload length contains all extension
headers
> >        * as well.
> > -      * since we don't support anything but frag headers,
> > -      * this is what we remove from the payload len.
> > +      * so we remove the extension len from the payload len.
> >        */
> > -     ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) -
sizeof(*frag_hdr);
> > +     exts_len = ip_frag_get_last_exthdr(ip_hdr, &last_ipv6_ext);
> > +     ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - exts_len -
sizeof(*frag_hdr);
> 
> Hmm..., as I remember ip_len is what we want to preserve in the
packet...
> Why we want to remove all previous ext headers here?

The ip_len for packet is computed again later, after reassembly.
However, here we
compute the ip_len to perform some checks on the packet. And this math
follows
what is mentioned in the RFC.

> >       trim = mb->pkt_len - (ip_len + mb->l3_len + mb->l2_len);
> >
> >       IP_FRAG_LOG(DEBUG, "%s:%d:\n"
> > @@ -201,6 +236,21 @@ rte_ipv6_frag_reassemble_packet(struct
rte_ip_frag_tbl *tbl,
> >       /* process the fragmented packet. */
> >       mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len,
> >                       MORE_FRAGS(frag_hdr->frag_data));
> 
> Can you explain why we setting these new fp fields after
'ip_frag_process()'?
> Ip_frag_process() itself can call reassembly() - if all fragments are
already in place.

This is a bug, but will fix it in the next version

> > +
> > +     /* store extension stack info, only for first fragment */
> > +     if (ip_ofs == 0) {
> 
> If we want it for first fragment only, why not invoke
ip_frag_get_last_exthdr()
> only when ip_ofs == 0?

No, ip_frag_get_last_exthdr() is called for all fragments to find the
len of
of extensions (if present) to perform length checks but we store this
information
only for the first fragment. After reassembly, this stored information
is used to
restore the extensions from the first fragment. 

> > +             /*
> > +              * fp->next_proto points to either the IP's next
header
> > +              * or th next header of the extension before the
fragment
> > +              * extension
> > +              */
> > +             fp->next_proto = (uint8_t *)&ip_hdr->proto;
> > +             if (exts_len > 0) {
> > +                     fp->exts_len = exts_len;
> > +                     fp->next_proto = last_ipv6_ext;
> > +             }
> > +     }
> > +
> >       ip_frag_inuse(tbl, fp);
> >
> >       IP_FRAG_LOG(DEBUG, "%s:%d:\n"
> > --
> > 2.34.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-10-14 16:12 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-08-26 11:23 [PATCH] ip_frag: support IPv6 reassembly with extensions vignesh.purushotham.srinivas
2024-08-26 15:41 ` Stephen Hemminger
2024-09-17 17:57   ` Konstantin Ananyev
2024-10-14 10:38     ` Vignesh Purushotham Srinivas
2024-09-17 18:07 ` Konstantin Ananyev
2024-10-14 16:11   ` Vignesh Purushotham Srinivas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).