The rte_raw_cksum_mbuf function is used to compute
the raw checksum of a packet.
If the packet payload stored in multi mbuf, the function
will goto the hard case. In hard case,
the variable 'tmp' is a type of uint32_t,
so rte_bswap16 will drop high 16 bit.
Meanwhile, the variable 'sum' is a type of uint32_t,
so 'sum += tmp' will drop the carry when overflow.
Both drop will make cksum incorrect.
This commit fixes the above bug.

Signed-off-by: Su Sai <susai.ss@bytedance.com>
---
 .mailmap            |  1 +
 lib/net/rte_cksum.h | 26 +++++++++++++++++++++++---
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/.mailmap b/.mailmap
index 34a99f93a1..838b544a97 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1891,3 +1891,4 @@ Zoltan Kiss <zoltan.kiss@schaman.hu> <zoltan.kiss@linaro.org>
 Zorik Machulsky <zorik@amazon.com>
 Zyta Szpak <zyta@marvell.com> <zr@semihalf.com>
 Zyta Szpak <zyta@marvell.com> <zyta.szpak@semihalf.com>
+Su Sai <susai.ss@bytedance.com>
diff --git a/lib/net/rte_cksum.h b/lib/net/rte_cksum.h
index a8e8927952..aa584d5f8d 100644
--- a/lib/net/rte_cksum.h
+++ b/lib/net/rte_cksum.h
@@ -80,6 +80,25 @@ __rte_raw_cksum_reduce(uint32_t sum)
         return (uint16_t)sum;
 }
 
+/**
+ * @internal Reduce a sum to the non-complemented checksum.
+ * Helper routine for the rte_raw_cksum_mbuf().
+ *
+ * @param sum
+ *   Value of the sum.
+ * @return
+ *   The non-complemented checksum.
+ */
+static inline uint16_t
+__rte_raw_cksum_reduce_u64(uint64_t sum)
+{
+        uint32_t tmp;
+
+        tmp = __rte_raw_cksum_reduce((uint32_t)sum);
+        tmp += __rte_raw_cksum_reduce((uint32_t)(sum >> 32));
+        return __rte_raw_cksum_reduce(tmp);
+}
+
 /**
  * Process the non-complemented checksum of a buffer.
  *
@@ -119,8 +138,9 @@ rte_raw_cksum_mbuf(const struct rte_mbuf *m, uint32_t off, uint32_t len,
 {
         const struct rte_mbuf *seg;
         const char *buf;
-        uint32_t sum, tmp;
+        uint32_t tmp;
         uint32_t seglen, done;
+        uint64_t sum;
 
         /* easy case: all data in the first segment */
         if (off + len <= rte_pktmbuf_data_len(m)) {
@@ -157,7 +177,7 @@ rte_raw_cksum_mbuf(const struct rte_mbuf *m, uint32_t off, uint32_t len,
         for (;;) {
                 tmp = __rte_raw_cksum(buf, seglen, 0);
                 if (done & 1)
-                        tmp = rte_bswap16((uint16_t)tmp);
+                        tmp = rte_bswap32(tmp);
                 sum += tmp;
                 done += seglen;
                 if (done == len)
@@ -169,7 +189,7 @@ rte_raw_cksum_mbuf(const struct rte_mbuf *m, uint32_t off, uint32_t len,
                         seglen = len - done;
         }
 
-        *cksum = __rte_raw_cksum_reduce(sum);
+        *cksum = __rte_raw_cksum_reduce_u64(sum);
         return 0;
 }
 
-- 
2.39.2 (Apple Git-143)