From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <dev-bounces@dpdk.org>
Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124])
	by inbox.dpdk.org (Postfix) with ESMTP id C88D0A04FF;
	Tue, 22 Mar 2022 04:09:43 +0100 (CET)
Received: from [217.70.189.124] (localhost [127.0.0.1])
	by mails.dpdk.org (Postfix) with ESMTP id 6318B410E5;
	Tue, 22 Mar 2022 04:09:43 +0100 (CET)
Received: from m12-17.163.com (m12-17.163.com [220.181.12.17])
 by mails.dpdk.org (Postfix) with ESMTP id 9F5FF410DD
 for <dev@dpdk.org>; Tue, 22 Mar 2022 04:09:41 +0100 (CET)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=163.com;
 s=s110527; h=From:Subject:Date:Message-Id; bh=n1pOVAb78EDxwDRdDQ
 dsLCW+nkUh7xbLcjCmhrjJLoM=; b=IkMzi8OhHSxnJsxdSlizC4WoOpjXmWWvWv
 ej8eOWVja7czquQeogZ2QfrgtZzspx8GbDxxKgsF1qhQgx/rBXvSN3M6PRoJAPEw
 SLBGegnrjGkTqAHeh9oMweWtqOlMXKUVwfel45YX3vuALXFLfxNJpsg8xbbe+8hw
 rIY29+6vE=
Received: from bogon.localdomain (unknown [36.111.88.33])
 by smtp13 (Coremail) with SMTP id EcCowACHiTvsPTlilejXIQ--.52549S2;
 Tue, 22 Mar 2022 11:09:37 +0800 (CST)
From: Huichao Cai <chcchc88@163.com>
To: dev@dpdk.org
Cc: konstantin.ananyev@intel.com
Subject: [PATCH v5] ip_frag: add IPv4 options fragment and test data
Date: Tue, 22 Mar 2022 11:09:20 +0800
Message-Id: <1647918560-14165-1-git-send-email-chcchc88@163.com>
X-Mailer: git-send-email 1.8.3.1
In-Reply-To: <1647328926-17118-1-git-send-email-chcchc88@163.com>
References: <1647328926-17118-1-git-send-email-chcchc88@163.com>
X-CM-TRANSID: EcCowACHiTvsPTlilejXIQ--.52549S2
X-Coremail-Antispam: 1Uf129KBjvAXoW3KFW5Gw4rAFy3XFW5GF43GFg_yoW8GrWfWo
 ZrXwn0va4vgr9FyrWvkry0qFy8Ww4ak3y7tws0gr4kuw1xCay5Gwn3Zw45Za1UZw45KryY
 kas2q3W3Xayvyr4kn29KB7ZKAUJUUUUU529EdanIXcx71UUUUU7v73VFW2AGmfu7bjvjm3
 AaLaJ3UbIYCTnIWIevJa73UjIFyTuYvjxUcdb1UUUUU
X-Originating-IP: [36.111.88.33]
X-CM-SenderInfo: pfkfuxrfyyqiywtou0bp/1tbitxHLF1aEOaX9KAAAsh
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org

According to RFC791,the options may appear or not in datagrams.
They must be implemented by all IP modules (host and gateways).
What is optional is their transmission in any particular datagram,
not their implementation.So we have to deal with it during the
fragmenting process.Add some test data for the IPv4 header optional
field fragmenting.

Signed-off-by: Huichao Cai <chcchc88@163.com>
---
 app/test/test_ipfrag.c               | 219 ++++++++++++++++++++++++++++++++---
 lib/ip_frag/rte_ip_frag.h            |   6 +
 lib/ip_frag/rte_ipv4_fragmentation.c |  70 ++++++++++-
 3 files changed, 272 insertions(+), 23 deletions(-)

diff --git a/app/test/test_ipfrag.c b/app/test/test_ipfrag.c
index 1ced25a..8289a60 100644
--- a/app/test/test_ipfrag.c
+++ b/app/test/test_ipfrag.c
@@ -18,10 +18,50 @@
 #define NUM_MBUFS 128
 #define BURST 32
 
+uint8_t expected_first_frag_ipv4_opts_copied[] = {
+	0x07, 0x0b, 0x04, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x83,
+	0x07, 0x04, 0xc0, 0xa8,
+	0xe3, 0x96, 0x00, 0x00,
+};
+
+uint8_t expected_sub_frag_ipv4_opts_copied[] = {
+	0x83, 0x07, 0x04, 0xc0,
+	0xa8, 0xe3, 0x96, 0x00,
+};
+
+uint8_t expected_first_frag_ipv4_opts_nocopied[] = {
+	0x07, 0x0b, 0x04, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+uint8_t expected_sub_frag_ipv4_opts_nocopied[0];
+
+struct test_opt_data {
+	bool is_first_frag;		 /**< offset is 0 */
+	bool opt_copied;		 /**< ip option copied flag */
+	uint16_t len;			 /**< option data len */
+	uint8_t data[RTE_IPV4_IPOPT_MAX_LEN]; /**< option data */
+};
+
 static struct rte_mempool *pkt_pool,
 			  *direct_pool,
 			  *indirect_pool;
 
+static inline void
+hex_to_str(uint8_t *hex, uint16_t len, char *str)
+{
+	int i;
+
+	for (i = 0; i < len; i++) {
+		sprintf(str, "%02x", hex[i]);
+		str += 2;
+	}
+	*str = 0;
+}
+
 static int
 setup_buf_pool(void)
 {
@@ -88,23 +128,67 @@ static void ut_teardown(void)
 {
 }
 
+static inline void
+test_get_ipv4_opt(bool is_first_frag, bool opt_copied,
+	struct test_opt_data *expected_opt)
+{
+	if (is_first_frag) {
+		if (opt_copied) {
+			expected_opt->len =
+				sizeof(expected_first_frag_ipv4_opts_copied);
+			rte_memcpy(expected_opt->data,
+				expected_first_frag_ipv4_opts_copied,
+				sizeof(expected_first_frag_ipv4_opts_copied));
+		} else {
+			expected_opt->len =
+				sizeof(expected_first_frag_ipv4_opts_nocopied);
+			rte_memcpy(expected_opt->data,
+				expected_first_frag_ipv4_opts_nocopied,
+				sizeof(expected_first_frag_ipv4_opts_nocopied));
+		}
+	} else {
+		if (opt_copied) {
+			expected_opt->len =
+				sizeof(expected_sub_frag_ipv4_opts_copied);
+			rte_memcpy(expected_opt->data,
+				expected_sub_frag_ipv4_opts_copied,
+				sizeof(expected_sub_frag_ipv4_opts_copied));
+		} else {
+			expected_opt->len =
+				sizeof(expected_sub_frag_ipv4_opts_nocopied);
+			rte_memcpy(expected_opt->data,
+				expected_sub_frag_ipv4_opts_nocopied,
+				sizeof(expected_sub_frag_ipv4_opts_nocopied));
+		}
+	}
+}
+
 static void
-v4_allocate_packet_of(struct rte_mbuf *b, int fill,
-		      size_t s, int df, uint8_t mf, uint16_t off,
-		      uint8_t ttl, uint8_t proto, uint16_t pktid)
+v4_allocate_packet_of(struct rte_mbuf *b, int fill, size_t s,
+	int df, uint8_t mf, uint16_t off, uint8_t ttl, uint8_t proto,
+	uint16_t pktid, bool have_opt, bool is_first_frag, bool opt_copied)
 {
 	/* Create a packet, 2k bytes long */
 	b->data_off = 0;
 	char *data = rte_pktmbuf_mtod(b, char *);
-	rte_be16_t fragment_offset = 0;	/**< fragmentation offset */
+	rte_be16_t fragment_offset = 0;	/* fragmentation offset */
+	uint16_t iph_len;
+	struct test_opt_data opt;
+
+	opt.len = 0;
+
+	if (have_opt)
+		test_get_ipv4_opt(is_first_frag, opt_copied, &opt);
 
-	memset(data, fill, sizeof(struct rte_ipv4_hdr) + s);
+	iph_len = sizeof(struct rte_ipv4_hdr) + opt.len;
+	memset(data, fill, iph_len + s);
 
 	struct rte_ipv4_hdr *hdr = (struct rte_ipv4_hdr *)data;
 
-	hdr->version_ihl = 0x45; /* standard IP header... */
+	hdr->version_ihl = 0x40; /* ipv4 */
+	hdr->version_ihl += (iph_len / 4);
 	hdr->type_of_service = 0;
-	b->pkt_len = s + sizeof(struct rte_ipv4_hdr);
+	b->pkt_len = s + iph_len;
 	b->data_len = b->pkt_len;
 	hdr->total_length = rte_cpu_to_be_16(b->pkt_len);
 	hdr->packet_id = rte_cpu_to_be_16(pktid);
@@ -131,6 +215,8 @@ static void ut_teardown(void)
 	hdr->hdr_checksum = 0;
 	hdr->src_addr = rte_cpu_to_be_32(0x8080808);
 	hdr->dst_addr = rte_cpu_to_be_32(0x8080404);
+
+	rte_memcpy(hdr + 1, opt.data, opt.len);
 }
 
 static void
@@ -187,6 +273,45 @@ static void ut_teardown(void)
 	}
 }
 
+static inline void
+test_get_frag_opt(struct rte_mbuf **mb, int32_t num,
+	struct test_opt_data *opt, int ipv, bool opt_copied)
+{
+	int32_t i;
+
+	for (i = 0; i < num; i++) {
+		if (ipv == 4) {
+			struct rte_ipv4_hdr *iph =
+			    rte_pktmbuf_mtod(mb[i], struct rte_ipv4_hdr *);
+			uint16_t header_len = (iph->version_ihl &
+				RTE_IPV4_HDR_IHL_MASK) *
+				RTE_IPV4_IHL_MULTIPLIER;
+			uint16_t opt_len = header_len -
+				sizeof(struct rte_ipv4_hdr);
+
+			opt->opt_copied = opt_copied;
+
+			if ((rte_be_to_cpu_16(iph->fragment_offset) &
+				    RTE_IPV4_HDR_OFFSET_MASK) == 0)
+				opt->is_first_frag = true;
+			else
+				opt->is_first_frag = false;
+
+			if (likely(opt_len <= RTE_IPV4_IPOPT_MAX_LEN)) {
+				char *iph_opt = rte_pktmbuf_mtod_offset(mb[i],
+				    char *, sizeof(struct rte_ipv4_hdr));
+				opt->len = opt_len;
+				rte_memcpy(opt->data, iph_opt, opt_len);
+			} else {
+				opt->len = RTE_IPV4_IPOPT_MAX_LEN;
+				memset(opt->data, RTE_IPV4_IPOPT_EOL,
+				    sizeof(opt->data));
+			}
+			opt++;
+		}
+	}
+}
+
 static int
 test_ip_frag(void)
 {
@@ -206,32 +331,52 @@ static void ut_teardown(void)
 		uint16_t pkt_id;
 		int      expected_frags;
 		uint16_t expected_fragment_offset[BURST];
+		bool have_opt;
+		bool is_first_frag;
+		bool opt_copied;
 	} tests[] = {
 		 {4, 1280, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID,       2,
-		  {0x2000, 0x009D}},
+		  {0x2000, 0x009D}, false},
 		 {4, 1280, 1400, 0, 0, 0, 64, IPPROTO_ICMP, 0,            2,
-		  {0x2000, 0x009D}},
+		  {0x2000, 0x009D}, false},
 		 {4,  600, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID,       3,
-		  {0x2000, 0x2048, 0x0090}},
+		  {0x2000, 0x2048, 0x0090}, false},
 		 {4, 4, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID,    -EINVAL},
 		 {4, 600, 1400, 1, 0, 0, 64, IPPROTO_ICMP, RND_ID, -ENOTSUP},
 		 {4, 600, 1400, 0, 0, 0, 0, IPPROTO_ICMP, RND_ID,         3,
-		  {0x2000, 0x2048, 0x0090}},
+		  {0x2000, 0x2046, 0x008C}, true, true, true},
+		 /* The first fragment */
+		 {4, 68, 104, 0, 1, 0, 0, IPPROTO_ICMP, RND_ID,           5,
+		  {0x2000, 0x2003, 0x2006, 0x2009, 0x200C}, true, true, true},
+		 /* The middle fragment */
 		 {4, 68, 104, 0, 1, 13, 0, IPPROTO_ICMP, RND_ID,          3,
-		  {0x200D, 0x2013, 0x2019}},
-
+		  {0x200D, 0x2012, 0x2017}, true, false, true},
+		 /* The last fragment */
+		 {4, 68, 104, 0, 0, 26, 0, IPPROTO_ICMP, RND_ID,          3,
+		  {0x201A, 0x201F, 0x0024}, true, false, true},
+		 /* The first fragment */
+		 {4, 68, 104, 0, 1, 0, 0, IPPROTO_ICMP, RND_ID,           4,
+		  {0x2000, 0x2004, 0x2008, 0x200C}, true, true, false},
+		 /* The middle fragment */
+		 {4, 68, 104, 0, 1, 13, 0, IPPROTO_ICMP, RND_ID,          3,
+		  {0x200D, 0x2013, 0x2019}, true, false, false},
+		 /* The last fragment */
+		 {4, 68, 104, 0, 0, 26, 0, IPPROTO_ICMP, RND_ID,          3,
+		  {0x201A, 0x2020, 0x0026}, true, false, false},
 		 {6, 1280, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID,       2,
-		  {0x0001, 0x04D0}},
+		  {0x0001, 0x04D0}, false},
 		 {6, 1300, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID,       2,
-		  {0x0001, 0x04E0}},
+		  {0x0001, 0x04E0}, false},
 		 {6, 4, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID,    -EINVAL},
 		 {6, 1300, 1400, 0, 0, 0, 0, IPPROTO_ICMP, RND_ID,        2,
-		  {0x0001, 0x04E0}},
+		  {0x0001, 0x04E0}, false},
 	};
 
 	for (i = 0; i < RTE_DIM(tests); i++) {
 		int32_t len = 0;
 		uint16_t fragment_offset[BURST];
+		struct test_opt_data opt_res[BURST];
+		struct test_opt_data opt_exp;
 		uint16_t pktid = tests[i].pkt_id;
 		struct rte_mbuf *pkts_out[BURST];
 		struct rte_mbuf *b = rte_pktmbuf_alloc(pkt_pool);
@@ -250,7 +395,10 @@ static void ut_teardown(void)
 					      tests[i].set_of,
 					      tests[i].ttl,
 					      tests[i].proto,
-					      pktid);
+					      pktid,
+					      tests[i].have_opt,
+					      tests[i].is_first_frag,
+					      tests[i].opt_copied);
 		} else if (tests[i].ipv == 6) {
 			v6_allocate_packet_of(b, 0x41414141,
 					      tests[i].pkt_size,
@@ -275,17 +423,20 @@ static void ut_teardown(void)
 		if (len > 0) {
 			test_get_offset(pkts_out, len,
 			    fragment_offset, tests[i].ipv);
+			if (tests[i].have_opt)
+				test_get_frag_opt(pkts_out, len, opt_res,
+					tests[i].ipv, tests[i].opt_copied);
 			test_free_fragments(pkts_out, len);
 		}
 
-		printf("%zd: checking %d with %d\n", i, len,
+		printf("[check frag number]%zd: checking %d with %d\n", i, len,
 		       tests[i].expected_frags);
 		RTE_TEST_ASSERT_EQUAL(len, tests[i].expected_frags,
 				      "Failed case %zd.\n", i);
 
 		if (len > 0) {
 			for (j = 0; j < (size_t)len; j++) {
-				printf("%zd-%zd: checking %d with %d\n",
+				printf("[check offset]%zd-%zd: checking %d with %d\n",
 				    i, j, fragment_offset[j],
 				    rte_cpu_to_be_16(
 					tests[i].expected_fragment_offset[j]));
@@ -294,6 +445,36 @@ static void ut_teardown(void)
 					tests[i].expected_fragment_offset[j]),
 				    "Failed case %zd.\n", i);
 			}
+
+			if (tests[i].have_opt && (tests[i].ipv == 4)) {
+				for (j = 0; j < (size_t)len; j++) {
+					char opt_res_str[2 *
+						RTE_IPV4_IPOPT_MAX_LEN + 1];
+					char opt_exp_str[2 *
+						RTE_IPV4_IPOPT_MAX_LEN + 1];
+
+					test_get_ipv4_opt(
+						opt_res[j].is_first_frag,
+						opt_res[j].opt_copied,
+						&opt_exp);
+					hex_to_str(opt_res[j].data,
+						opt_res[j].len,
+						opt_res_str);
+					hex_to_str(opt_exp.data,
+						opt_exp.len,
+						opt_exp_str);
+
+					printf(
+						"[check ipv4 option]%zd-%zd: checking (len:%u)%s with (len:%u)%s\n",
+						i, j,
+						opt_res[j].len, opt_res_str,
+						opt_exp.len, opt_exp_str);
+						RTE_TEST_ASSERT_SUCCESS(
+							strcmp(opt_res_str,
+								opt_exp_str),
+						"Failed case %zd.\n", i);
+				}
+			}
 		}
 
 	}
diff --git a/lib/ip_frag/rte_ip_frag.h b/lib/ip_frag/rte_ip_frag.h
index 7d2abe1..f337d1e 100644
--- a/lib/ip_frag/rte_ip_frag.h
+++ b/lib/ip_frag/rte_ip_frag.h
@@ -27,6 +27,12 @@
 
 struct rte_mbuf;
 
+/* IP options */
+#define RTE_IPV4_IPOPT_EOL       0
+#define RTE_IPV4_IPOPT_NOP       1
+#define RTE_IPV4_IPOPT_COPIED(v) ((v) & 0x80)
+#define RTE_IPV4_IPOPT_MAX_LEN   40
+
 /** death row size (in packets) */
 #define RTE_IP_FRAG_DEATH_ROW_LEN 32
 
diff --git a/lib/ip_frag/rte_ipv4_fragmentation.c b/lib/ip_frag/rte_ipv4_fragmentation.c
index 2e7739d..1e655a7 100644
--- a/lib/ip_frag/rte_ipv4_fragmentation.c
+++ b/lib/ip_frag/rte_ipv4_fragmentation.c
@@ -22,6 +22,8 @@
 
 #define	IPV4_HDR_FO_ALIGN			(1 << RTE_IPV4_HDR_FO_SHIFT)
 
+#define IPV4_HDR_MAX_LEN			60
+
 static inline void __fill_ipv4hdr_frag(struct rte_ipv4_hdr *dst,
 		const struct rte_ipv4_hdr *src, uint16_t header_len,
 		uint16_t len, uint16_t fofs, uint16_t dofs, uint32_t mf)
@@ -41,6 +43,49 @@ static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num)
 		rte_pktmbuf_free(mb[i]);
 }
 
+static inline uint16_t __create_ipopt_frag_hdr(uint8_t *iph,
+	uint16_t ipopt_len, uint8_t *ipopt_frag_hdr)
+{
+	uint16_t len = ipopt_len;
+	struct rte_ipv4_hdr *iph_opt = (struct rte_ipv4_hdr *)ipopt_frag_hdr;
+
+	ipopt_len = 0;
+	rte_memcpy(ipopt_frag_hdr, iph, sizeof(struct rte_ipv4_hdr));
+	ipopt_frag_hdr += sizeof(struct rte_ipv4_hdr);
+
+	uint8_t *p_opt = iph + sizeof(struct rte_ipv4_hdr);
+
+	while (len > 0) {
+		if (unlikely(*p_opt == RTE_IPV4_IPOPT_NOP)) {
+			len--;
+			p_opt++;
+			continue;
+		} else if (unlikely(*p_opt == RTE_IPV4_IPOPT_EOL))
+			break;
+
+		if (unlikely(p_opt[1] < 2 || p_opt[1] > len))
+			break;
+
+		if (RTE_IPV4_IPOPT_COPIED(*p_opt)) {
+			rte_memcpy(ipopt_frag_hdr + ipopt_len,
+				p_opt, p_opt[1]);
+			ipopt_len += p_opt[1];
+		}
+
+		len -= p_opt[1];
+		p_opt += p_opt[1];
+	}
+
+	len = RTE_ALIGN_CEIL(ipopt_len, RTE_IPV4_IHL_MULTIPLIER);
+	memset(ipopt_frag_hdr + ipopt_len,
+		RTE_IPV4_IPOPT_EOL, len - ipopt_len);
+	ipopt_len = len;
+	iph_opt->ihl = (sizeof(struct rte_ipv4_hdr) + ipopt_len) /
+		RTE_IPV4_IHL_MULTIPLIER;
+
+	return ipopt_len;
+}
+
 /**
  * IPv4 fragmentation.
  *
@@ -76,6 +121,8 @@ static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num)
 	uint32_t more_in_segs;
 	uint16_t fragment_offset, flag_offset, frag_size, header_len;
 	uint16_t frag_bytes_remaining;
+	uint8_t ipopt_frag_hdr[IPV4_HDR_MAX_LEN];
+	uint16_t ipopt_len;
 
 	/*
 	 * Formal parameter checking.
@@ -118,6 +165,10 @@ static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num)
 	out_pkt_pos = 0;
 	fragment_offset = 0;
 
+	ipopt_len = header_len - sizeof(struct rte_ipv4_hdr);
+	if (unlikely(ipopt_len > RTE_IPV4_IPOPT_MAX_LEN))
+		return -EINVAL;
+
 	more_in_segs = 1;
 	while (likely(more_in_segs)) {
 		struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL;
@@ -188,10 +239,21 @@ static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num)
 		    (uint16_t)out_pkt->pkt_len,
 		    flag_offset, fragment_offset, more_in_segs);
 
-		fragment_offset = (uint16_t)(fragment_offset +
-		    out_pkt->pkt_len - header_len);
-
-		out_pkt->l3_len = header_len;
+		if (unlikely((fragment_offset == 0) && (ipopt_len) &&
+			    ((flag_offset & RTE_IPV4_HDR_OFFSET_MASK) == 0))) {
+			ipopt_len = __create_ipopt_frag_hdr((uint8_t *)in_hdr,
+				ipopt_len, ipopt_frag_hdr);
+			fragment_offset = (uint16_t)(fragment_offset +
+				out_pkt->pkt_len - header_len);
+			out_pkt->l3_len = header_len;
+
+			header_len = sizeof(struct rte_ipv4_hdr) + ipopt_len;
+			in_hdr = (struct rte_ipv4_hdr *)ipopt_frag_hdr;
+		} else {
+			fragment_offset = (uint16_t)(fragment_offset +
+				out_pkt->pkt_len - header_len);
+			out_pkt->l3_len = header_len;
+		}
 
 		/* Write the fragment to the output list */
 		pkts_out[out_pkt_pos] = out_pkt;
-- 
1.8.3.1