Hi, @olivier.matz@6wind.com 

On Sun, 11 Jun 2023, 1:17 am Tanzeel-inline, <tanxeel1.ahmed@gmail.com> wrote:
None of the foundational NICs currently supports MPLS insertion and
stripping, this functionality can help users who rely on MPLS in their
network application.

Signed-off-by: Tanzeel Ahmed <tanxeel1.ahmed@gmail.com>

---

This patch is new version of [PATCH] lib/net: added push MPLS header API.
I have also added the MPLS strip functionality to address the question
asked in last patch.

> To be honest, I have some doubts about the usefulness of the patch,
> especially the function that strips all the MPLS headers.

I believe it serves a practical purpose, in scenarios involving tapped traffic where MPLS headers
have not been stripped.
While some headers in the lib/net folder have well-defined functions, others are limited to their
structure alone. It would be advantageous to have basic functions available for all headers.

> I think the function should only strip the first MPLS header, it is
> symmetric with the previous function, and more flexible.

You are right, stripping one header is more flexible.
I updated the function to return 1, in case of stripping last MPLS header.

v5:
* Updated the MPLS strip function to strip one header at a time.
* Added the unit test cases.

v4:
* Removed extra void cast.
* rte_pktmbuf_append/mtod now return void*.
  The memmove result is casted to rte_ether_hdr*.

v3:
* fixed patch check failure issue

v2:
* marked experimental
* coding style fixed
* changed rte_memcpy to memcpy
* mpls header marked as const in parameter
* added MPLS stripping functionality
---
 app/test/meson.build |   2 +
 app/test/test_mpls.c | 180 +++++++++++++++++++++++++++++++++++++++++++
 lib/net/rte_mpls.h   | 106 +++++++++++++++++++++++++
 3 files changed, 288 insertions(+)
 create mode 100644 app/test/test_mpls.c

diff --git a/app/test/meson.build b/app/test/meson.build
index f34d19e3c3..548349399f 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -95,6 +95,7 @@ test_sources = files(
         'test_meter.c',
         'test_mcslock.c',
         'test_mp_secondary.c',
+        'test_mpls.c',
         'test_per_lcore.c',
         'test_pflock.c',
         'test_pmd_perf.c',
@@ -205,6 +206,7 @@ fast_tests = [
         ['mempool_autotest', false, true],
         ['memzone_autotest', false, true],
         ['meter_autotest', true, true],
+        ['mpls_autotest', false, true],
         ['multiprocess_autotest', false, false],
         ['per_lcore_autotest', true, true],
         ['pflock_autotest', true, true],
diff --git a/app/test/test_mpls.c b/app/test/test_mpls.c
new file mode 100644
index 0000000000..8ff701f6e0
--- /dev/null
+++ b/app/test/test_mpls.c
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include "test.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_ether.h>
+#include <rte_mpls.h>
+
+#define MEMPOOL_CACHE_SIZE      32
+#define MBUF_DATA_SIZE          2048
+#define NB_MBUF                 128
+
+static int
+test_mpls_fail_push(struct rte_mbuf *m)
+{
+       struct rte_mpls_hdr mpls;
+
+       /* create dummy MPLS header */
+       mpls.tag_msb = 1;
+       mpls.tag_lsb = 2;
+       mpls.bs = 1;
+       mpls.tc = 1;
+       mpls.ttl = 255;
+
+       /* push first MPLS header */
+       if (rte_mpls_push_over_l2(m, &mpls) != 0)
+               return 0;
+       return -1;
+}
+
+static int
+test_mpls_push(struct rte_mbuf *m)
+{
+       struct rte_mpls_hdr mpls;
+
+       /* create dummy MPLS header */
+       mpls.tag_msb = 1;
+       mpls.tag_lsb = 2;
+       mpls.bs = 1;
+       mpls.tc = 1;
+       mpls.ttl = 255;
+
+       /* push first MPLS header */
+       if (rte_mpls_push_over_l2(m, &mpls) != 0) {
+               printf("Failed to insert mpls 1\n");
+               return -1;
+       }
+       if (rte_pktmbuf_pkt_len(m) != RTE_ETHER_HDR_LEN + RTE_MPLS_HLEN) {
+               printf("Bad pkt length after inserting first mpls header\n");
+               return -1;
+       }
+
+       /* push second MPLS header*/
+       if (rte_mpls_push_over_l2(m, &mpls) != 0) {
+               printf("failed to insert mpls 1\n");
+               return -1;
+       }
+       if (rte_pktmbuf_pkt_len(m) != RTE_ETHER_HDR_LEN + RTE_MPLS_HLEN * 2) {
+               printf("bad pkt length after inserting second mpls header\n");
+               return -1;
+       }
+       return 0;
+}
+
+static int
+test_mpls_fail_strip(struct rte_mbuf *m)
+{
+       /* strip MPLS headers */
+       if (rte_mpls_strip_over_l2(m) != 0)
+               return 0;
+       return -1;
+}
+
+static int
+test_mpls_strip(struct rte_mbuf *m)
+{
+       /* strip MPLS headers */
+       return rte_mpls_strip_over_l2(m);
+}
+
+static int
+test_mpls(void)
+{
+       int ret = -1;
+       struct rte_mempool *pktmbuf_pool = NULL;
+       struct rte_mbuf *m = NULL;
+       char *data;
+       struct rte_ether_hdr eh;
+
+       /* create pktmbuf pool */
+       pktmbuf_pool = rte_pktmbuf_pool_create("test_mpls_pool",
+                       NB_MBUF, MEMPOOL_CACHE_SIZE, 0, MBUF_DATA_SIZE,
+                       SOCKET_ID_ANY);
+
+       if (pktmbuf_pool == NULL) {
+               printf("cannot allocate mbuf pool\n");
+               goto err;
+       }
+
+    /* allocate mbuf from pool */
+       m = rte_pktmbuf_alloc(pktmbuf_pool);
+       if (m == NULL) {
+               printf("mbuf alloc failed\n");
+               goto err;
+       }
+       if (rte_pktmbuf_data_len(m) != 0) {
+               printf("mbuf alloc bad length\n");
+               goto err;
+       }
+
+       if (test_mpls_fail_push(m) < 0) {
+               printf("test_mpls_fail_push() failed\n");
+               goto err;
+       }
+
+       if (test_mpls_fail_strip(m) < 0) {
+               printf("test_mpls_fail_strip() failed\n");
+               goto err;
+       }
+
+       /* create a dummy ethernet header */
+       memset(&eh.src_addr, 0, RTE_ETHER_ADDR_LEN);
+       memset(&eh.dst_addr, 0, RTE_ETHER_ADDR_LEN);
+       eh.ether_type = rte_be_to_cpu_16(RTE_ETHER_TYPE_IPV4);
+
+       /* append ethernet header into mbuf */
+       data = rte_pktmbuf_append(m, RTE_ETHER_HDR_LEN);
+       if (data == NULL) {
+               printf("cannot append data\n");
+               goto err;
+       }
+       if (rte_pktmbuf_data_len(m) != RTE_ETHER_HDR_LEN) {
+               printf("bad pkt data length\n");
+               goto err;
+       }
+       memcpy(data, &eh, RTE_ETHER_HDR_LEN);
+
+       if (test_mpls_push(m) < 0) {
+               printf("test_mpls_push() failed\n");
+               goto err;
+       }
+
+       if (test_mpls_strip(m) < 0) {
+               printf("test_mpls_push() failed\n");
+               goto err;
+       }
+       if (rte_pktmbuf_data_len(m) != RTE_ETHER_HDR_LEN + RTE_MPLS_HLEN) {
+               printf("bad pkt data length after stripping first MPLS header\n");
+               goto err;
+       }
+
+       if (test_mpls_strip(m) < 0) {
+               printf("test_mpls_push() failed\n");
+               goto err;
+       }
+       if (rte_pktmbuf_data_len(m) != RTE_ETHER_HDR_LEN) {
+               printf("bad pkt data length after stripping second MPLS header\n");
+               goto err;
+       }
+       ret = 0;
+err:
+       if (m)
+               rte_pktmbuf_free(m);
+       if (pktmbuf_pool)
+               rte_mempool_free(pktmbuf_pool);
+       return ret;
+}
+
+REGISTER_TEST_COMMAND(mpls_autotest, test_mpls);
diff --git a/lib/net/rte_mpls.h b/lib/net/rte_mpls.h
index 3e8cb90ec3..a2072cdd10 100644
--- a/lib/net/rte_mpls.h
+++ b/lib/net/rte_mpls.h
@@ -13,6 +13,8 @@

 #include <stdint.h>
 #include <rte_byteorder.h>
+#include <rte_ether.h>
+#include <rte_mbuf.h>

 #ifdef __cplusplus
 extern "C" {
@@ -36,6 +38,110 @@ struct rte_mpls_hdr {
        uint8_t  ttl;       /**< Time to live. */
 } __rte_packed;

+#define RTE_MPLS_HLEN 4 /**< Length of MPLS header. */
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Insert MPLS header into the packet.
+ * If it's the first MPLS header to be inserted in the packet,
+ *  - Update the ether type.
+ *  - Set the MPLS bottom-of-stack bit to 1.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ * @param mp
+ *   The pointer to the MPLS header.
+ * @return
+ *   0 on success, -1 on error.
+ */
+__rte_experimental
+static inline int
+rte_mpls_push_over_l2(struct rte_mbuf *m, const struct rte_mpls_hdr *mp)
+{
+       struct rte_ether_hdr *oh, *nh;
+       struct rte_mpls_hdr *mph;
+
+       /* Can't insert header if mbuf is shared */
+       if (!RTE_MBUF_DIRECT(m) || rte_mbuf_refcnt_read(m) > 1)
+               return -EINVAL;
+
+       /* Can't insert header if ethernet frame doesn't exist */
+       if (rte_pktmbuf_data_len(m) < RTE_ETHER_HDR_LEN)
+               return -EINVAL;
+
+       oh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+       nh = (struct rte_ether_hdr *)(void *)
+               rte_pktmbuf_prepend(m, sizeof(struct rte_mpls_hdr));
+       if (nh == NULL)
+               return -ENOSPC;
+
+       memmove(nh, oh, RTE_ETHER_HDR_LEN);
+
+       /* Copy the MPLS header after ethernet frame */
+       mph = rte_pktmbuf_mtod_offset(m, struct rte_mpls_hdr *,
+                       sizeof(struct rte_ether_hdr));
+       memcpy(mph, mp, RTE_MPLS_HLEN);
+
+       mph->tag_msb = rte_cpu_to_be_16(mp->tag_msb);
+
+       /* If first MPLS header, update ether type and bottom-of-stack bit */
+       if (nh->ether_type != rte_cpu_to_be_16(RTE_ETHER_TYPE_MPLS)) {
+               nh->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_MPLS);
+               mph->bs = 1;
+       } else {
+               mph->bs = 0;
+       }
+
+       return 0;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Strip MPLS header from the packet without updating the ether type.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ * @return
+ *   1 if last MPLS header is stripped,
+ *   0 on success,
+ *   -1 on error.
+ */
+__rte_experimental
+static inline int
+rte_mpls_strip_over_l2(struct rte_mbuf *m)
+{
+       struct rte_ether_hdr *eh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+       struct rte_mpls_hdr *mph;
+       int result = 0;
+
+       /* Can't strip header if mbuf is shared */
+       if (!RTE_MBUF_DIRECT(m) || rte_mbuf_refcnt_read(m) > 1)
+               return -EINVAL;
+
+       /* Can't strip header if packet length is smaller */
+       if (rte_pktmbuf_data_len(m) < RTE_ETHER_HDR_LEN + RTE_MPLS_HLEN)
+               return -EINVAL;
+
+       if (eh->ether_type != rte_cpu_to_be_16(RTE_ETHER_TYPE_MPLS))
+               return -1;
+
+       /* Stripping MPLS header */
+       mph = rte_pktmbuf_mtod_offset(m, struct rte_mpls_hdr *,
+               sizeof(struct rte_ether_hdr));
+       if (mph->bs & 1)
+               result = 1;
+       memmove(
+               rte_pktmbuf_adj(m, sizeof(struct rte_mpls_hdr)),
+               eh, sizeof(struct rte_ether_hdr));
+       eh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+
+       return result;
+}
+
 #ifdef __cplusplus
 }
 #endif
--
2.34.1