DPDK patches and discussions
 help / color / mirror / Atom feed
* [RFC] ip_frag: support IPv6 reassembly with extensions
@ 2024-02-13 11:47 vignesh.purushotham.srinivas
  2024-02-14  3:51 ` Stephen Hemminger
  0 siblings, 1 reply; 5+ messages in thread
From: vignesh.purushotham.srinivas @ 2024-02-13 11:47 UTC (permalink / raw)
  To: konstantin.v.ananyev; +Cc: dev, Vignesh PS

From: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>

Add support to ip_frag library to perform IPv6 reassembly
when extension headers are present before the fragment
extension in the packet.

Signed-off-by: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>
---
 .mailmap                          |  1 +
 lib/ip_frag/ip_frag_common.h      |  2 +
 lib/ip_frag/ip_reassembly.h       |  2 +
 lib/ip_frag/rte_ipv6_reassembly.c | 64 ++++++++++++++++++++++++++-----
 4 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/.mailmap b/.mailmap
index de339562f4..6c068d5af3 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1476,6 +1476,7 @@ Viacheslav Ovsiienko <viacheslavo@nvidia.com> <viacheslavo@mellanox.com>
 Victor Kaplansky <victork@redhat.com>
 Victor Raj <victor.raj@intel.com>
 Vidya Sagar Velumuri <vvelumuri@marvell.com>
+Vignesh PS <vignesh.purushotham.srinivas@ericsson.com> <vig.vigneshps1995@gmail.com>
 Vignesh Sridhar <vignesh.sridhar@intel.com>
 Vijayakumar Muthuvel Manickam <mmvijay@gmail.com>
 Vijaya Mohan Guvva <vijay1054@gmail.com>
diff --git a/lib/ip_frag/ip_frag_common.h b/lib/ip_frag/ip_frag_common.h
index c766154dbe..e671246898 100644
--- a/lib/ip_frag/ip_frag_common.h
+++ b/lib/ip_frag/ip_frag_common.h
@@ -169,6 +169,8 @@ ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms)
 	fp->total_size = UINT32_MAX;
 	fp->frag_size = 0;
 	fp->last_idx = IP_MIN_FRAG_NUM;
+	fp->exts_len = 0;
+	fp->next_proto = NULL;
 	fp->frags[IP_LAST_FRAG_IDX] = zero_frag;
 	fp->frags[IP_FIRST_FRAG_IDX] = zero_frag;
 }
diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
index a9f97aefca..4605e1291f 100644
--- a/lib/ip_frag/ip_reassembly.h
+++ b/lib/ip_frag/ip_reassembly.h
@@ -54,6 +54,8 @@ struct ip_frag_pkt {
 	uint32_t total_size;                   /* expected reassembled size */
 	uint32_t frag_size;                    /* size of fragments received */
 	uint32_t last_idx;                     /* index of next entry to fill */
+	uint32_t exts_len;                     /* length of extension hdrs for first fragment */
+	uint8_t *next_proto;                   /* pointer of the next_proto field */
 	struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
 } __rte_cache_aligned;
 
diff --git a/lib/ip_frag/rte_ipv6_reassembly.c b/lib/ip_frag/rte_ipv6_reassembly.c
index 88863a98d1..4e862fb55a 100644
--- a/lib/ip_frag/rte_ipv6_reassembly.c
+++ b/lib/ip_frag/rte_ipv6_reassembly.c
@@ -91,19 +91,19 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
 	/* update ipv6 header for the reassembled datagram */
 	ip_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, m->l2_len);
 
+	payload_len += fp->exts_len;
 	ip_hdr->payload_len = rte_cpu_to_be_16(payload_len);
 
 	/*
 	 * remove fragmentation header. note that per RFC2460, we need to update
 	 * the last non-fragmentable header with the "next header" field to contain
-	 * type of the first fragmentable header, but we currently don't support
-	 * other headers, so we assume there are no other headers and thus update
-	 * the main IPv6 header instead.
+	 * type of the first fragmentable header.
 	 */
-	move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
-	frag_hdr = (struct rte_ipv6_fragment_ext *) (ip_hdr + 1);
-	ip_hdr->proto = frag_hdr->next_header;
+	frag_hdr = (struct rte_ipv6_fragment_ext *)
+		((uint8_t *) (ip_hdr + 1) + fp->exts_len);
+	*fp->next_proto = frag_hdr->next_header;
 
+	move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
 	ip_frag_memmove(rte_pktmbuf_mtod_offset(m, char *, sizeof(*frag_hdr)),
 			rte_pktmbuf_mtod(m, char*), move_len);
 
@@ -112,6 +112,35 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
 	return m;
 }
 
+/*
+ * Function to crawl through the extension header stack.
+ * This function breaks as soon a the fragment header is
+ * found and returns the total length the traversed exts
+ * and the last extension before the fragment header
+ */
+static inline uint32_t
+ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t **last_ext)
+{
+	uint32_t total_len = 0;
+	size_t ext_len = 0;
+	*last_ext = (uint8_t *)(ip_hdr + 1);
+	int next_proto = ip_hdr->proto;
+
+	while (next_proto != IPPROTO_FRAGMENT &&
+		(next_proto = rte_ipv6_get_next_ext(
+		*last_ext, next_proto, &ext_len)) >= 0) {
+
+		total_len += ext_len;
+
+		if (next_proto == IPPROTO_FRAGMENT)
+			return total_len;
+
+		*last_ext += ext_len;
+	}
+
+	return total_len;
+}
+
 /*
  * Process new mbuf with fragment of IPV6 datagram.
  * Incoming mbuf should have its l2_len/l3_len fields setup correctly.
@@ -139,6 +168,8 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
 {
 	struct ip_frag_pkt *fp;
 	struct ip_frag_key key;
+	uint8_t *last_ipv6_ext;
+	uint32_t exts_len;
 	uint16_t ip_ofs;
 	int32_t ip_len;
 	int32_t trim;
@@ -154,10 +185,10 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
 	/*
 	 * as per RFC2460, payload length contains all extension headers
 	 * as well.
-	 * since we don't support anything but frag headers,
-	 * this is what we remove from the payload len.
+	 * so we remove the extension len from the payload len.
 	 */
-	ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr);
+	exts_len = ip_frag_get_last_exthdr(ip_hdr, &last_ipv6_ext);
+	ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - exts_len - sizeof(*frag_hdr);
 	trim = mb->pkt_len - (ip_len + mb->l3_len + mb->l2_len);
 
 	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
@@ -201,6 +232,21 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
 	/* process the fragmented packet. */
 	mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len,
 			MORE_FRAGS(frag_hdr->frag_data));
+
+	/* store extension stack info, only for first fragment */
+	if (ip_ofs == 0) {
+		/*
+		 * fp->next_proto points to either the IP's next header
+		 * or th next header of the extension before the fragment
+		 * extension
+		 */
+		fp->next_proto = (uint8_t *)&ip_hdr->proto;
+		if (exts_len > 0) {
+			fp->exts_len = exts_len;
+			fp->next_proto = last_ipv6_ext;
+		}
+	}
+
 	ip_frag_inuse(tbl, fp);
 
 	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
-- 
2.34.1



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC] ip_frag: support IPv6 reassembly with extensions
  2024-02-13 11:47 [RFC] ip_frag: support IPv6 reassembly with extensions vignesh.purushotham.srinivas
@ 2024-02-14  3:51 ` Stephen Hemminger
  2024-11-11 11:24   ` Thomas Monjalon
  0 siblings, 1 reply; 5+ messages in thread
From: Stephen Hemminger @ 2024-02-14  3:51 UTC (permalink / raw)
  To: vignesh.purushotham.srinivas; +Cc: konstantin.v.ananyev, dev

On Tue, 13 Feb 2024 12:47:27 +0100
<vignesh.purushotham.srinivas@ericsson.com> wrote:

> +/*
> + * Function to crawl through the extension header stack.
> + * This function breaks as soon a the fragment header is
> + * found and returns the total length the traversed exts
> + * and the last extension before the fragment header
> + */
> +static inline uint32_t
> +ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t **last_ext)
> +{
> +	uint32_t total_len = 0;
> +	size_t ext_len = 0;
> +	*last_ext = (uint8_t *)(ip_hdr + 1);
> +	int next_proto = ip_hdr->proto;
> +
> +	while (next_proto != IPPROTO_FRAGMENT &&
> +		(next_proto = rte_ipv6_get_next_ext(
> +		*last_ext, next_proto, &ext_len)) >= 0) {
> +
> +		total_len += ext_len;
> +
> +		if (next_proto == IPPROTO_FRAGMENT)
> +			return total_len;
> +
> +		*last_ext += ext_len;
> +	}
> +
> +	return total_len;
> +}

Doing endless loop like this opens up DoS attacks.
Better to use rte_next_skip_ip6_ext() or do similar limited loop.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC] ip_frag: support IPv6 reassembly with extensions
  2024-02-14  3:51 ` Stephen Hemminger
@ 2024-11-11 11:24   ` Thomas Monjalon
  2024-11-11 12:59     ` Konstantin Ananyev
  0 siblings, 1 reply; 5+ messages in thread
From: Thomas Monjalon @ 2024-11-11 11:24 UTC (permalink / raw)
  To: vignesh.purushotham.srinivas; +Cc: dev, konstantin.v.ananyev, Stephen Hemminger

14/02/2024 04:51, Stephen Hemminger:
> On Tue, 13 Feb 2024 12:47:27 +0100
> <vignesh.purushotham.srinivas@ericsson.com> wrote:
> 
> > +/*
> > + * Function to crawl through the extension header stack.
> > + * This function breaks as soon a the fragment header is
> > + * found and returns the total length the traversed exts
> > + * and the last extension before the fragment header
> > + */
> > +static inline uint32_t
> > +ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t **last_ext)
> > +{
> > +	uint32_t total_len = 0;
> > +	size_t ext_len = 0;
> > +	*last_ext = (uint8_t *)(ip_hdr + 1);
> > +	int next_proto = ip_hdr->proto;
> > +
> > +	while (next_proto != IPPROTO_FRAGMENT &&
> > +		(next_proto = rte_ipv6_get_next_ext(
> > +		*last_ext, next_proto, &ext_len)) >= 0) {
> > +
> > +		total_len += ext_len;
> > +
> > +		if (next_proto == IPPROTO_FRAGMENT)
> > +			return total_len;
> > +
> > +		*last_ext += ext_len;
> > +	}
> > +
> > +	return total_len;
> > +}
> 
> Doing endless loop like this opens up DoS attacks.
> Better to use rte_next_skip_ip6_ext() or do similar limited loop.

There was no reply to this interesting comment?



^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [RFC] ip_frag: support IPv6 reassembly with extensions
  2024-11-11 11:24   ` Thomas Monjalon
@ 2024-11-11 12:59     ` Konstantin Ananyev
  2024-11-11 13:10       ` Thomas Monjalon
  0 siblings, 1 reply; 5+ messages in thread
From: Konstantin Ananyev @ 2024-11-11 12:59 UTC (permalink / raw)
  To: Thomas Monjalon, vignesh.purushotham.srinivas
  Cc: dev, konstantin.v.ananyev, Stephen Hemminger


> 14/02/2024 04:51, Stephen Hemminger:
> > On Tue, 13 Feb 2024 12:47:27 +0100
> > <vignesh.purushotham.srinivas@ericsson.com> wrote:
> >
> > > +/*
> > > + * Function to crawl through the extension header stack.
> > > + * This function breaks as soon a the fragment header is
> > > + * found and returns the total length the traversed exts
> > > + * and the last extension before the fragment header
> > > + */
> > > +static inline uint32_t
> > > +ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t **last_ext)
> > > +{
> > > +	uint32_t total_len = 0;
> > > +	size_t ext_len = 0;
> > > +	*last_ext = (uint8_t *)(ip_hdr + 1);
> > > +	int next_proto = ip_hdr->proto;
> > > +
> > > +	while (next_proto != IPPROTO_FRAGMENT &&
> > > +		(next_proto = rte_ipv6_get_next_ext(
> > > +		*last_ext, next_proto, &ext_len)) >= 0) {
> > > +
> > > +		total_len += ext_len;
> > > +
> > > +		if (next_proto == IPPROTO_FRAGMENT)
> > > +			return total_len;
> > > +
> > > +		*last_ext += ext_len;
> > > +	}
> > > +
> > > +	return total_len;
> > > +}
> >
> > Doing endless loop like this opens up DoS attacks.
> > Better to use rte_next_skip_ip6_ext() or do similar limited loop.
> 
> There was no reply to this interesting comment?

I think there is a limit is the latest version for that patch:
https://patchwork.dpdk.org/project/dpdk/patch/20241015082133.3910533-1-vignesh.purushotham.srinivas@ericsson.com/
It is also has an ACK from me...
Though looking at it once again - we'd better have an extra check here to
make sure that total_len would not exceed mbuf->data_len. 



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC] ip_frag: support IPv6 reassembly with extensions
  2024-11-11 12:59     ` Konstantin Ananyev
@ 2024-11-11 13:10       ` Thomas Monjalon
  0 siblings, 0 replies; 5+ messages in thread
From: Thomas Monjalon @ 2024-11-11 13:10 UTC (permalink / raw)
  To: vignesh.purushotham.srinivas, Konstantin Ananyev
  Cc: dev, konstantin.v.ananyev, Stephen Hemminger

11/11/2024 13:59, Konstantin Ananyev:
> 
> > 14/02/2024 04:51, Stephen Hemminger:
> > > On Tue, 13 Feb 2024 12:47:27 +0100
> > > <vignesh.purushotham.srinivas@ericsson.com> wrote:
> > >
> > > > +/*
> > > > + * Function to crawl through the extension header stack.
> > > > + * This function breaks as soon a the fragment header is
> > > > + * found and returns the total length the traversed exts
> > > > + * and the last extension before the fragment header
> > > > + */
> > > > +static inline uint32_t
> > > > +ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t **last_ext)
> > > > +{
> > > > +	uint32_t total_len = 0;
> > > > +	size_t ext_len = 0;
> > > > +	*last_ext = (uint8_t *)(ip_hdr + 1);
> > > > +	int next_proto = ip_hdr->proto;
> > > > +
> > > > +	while (next_proto != IPPROTO_FRAGMENT &&
> > > > +		(next_proto = rte_ipv6_get_next_ext(
> > > > +		*last_ext, next_proto, &ext_len)) >= 0) {
> > > > +
> > > > +		total_len += ext_len;
> > > > +
> > > > +		if (next_proto == IPPROTO_FRAGMENT)
> > > > +			return total_len;
> > > > +
> > > > +		*last_ext += ext_len;
> > > > +	}
> > > > +
> > > > +	return total_len;
> > > > +}
> > >
> > > Doing endless loop like this opens up DoS attacks.
> > > Better to use rte_next_skip_ip6_ext() or do similar limited loop.
> > 
> > There was no reply to this interesting comment?
> 
> I think there is a limit is the latest version for that patch:
> https://patchwork.dpdk.org/project/dpdk/patch/20241015082133.3910533-1-vignesh.purushotham.srinivas@ericsson.com/

Even if there is a new version, we should reply to comments.

> It is also has an ACK from me...
> Though looking at it once again - we'd better have an extra check here to
> make sure that total_len would not exceed mbuf->data_len. 

I suppose it can wait 25.03.



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-11-11 13:10 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-13 11:47 [RFC] ip_frag: support IPv6 reassembly with extensions vignesh.purushotham.srinivas
2024-02-14  3:51 ` Stephen Hemminger
2024-11-11 11:24   ` Thomas Monjalon
2024-11-11 12:59     ` Konstantin Ananyev
2024-11-11 13:10       ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).