DPDK patches and discussions
 help / color / mirror / Atom feed
From: "Morten Brørup" <mb@smartsharesystems.com>
To: <scott.k.mitch1@gmail.com>, <dev@dpdk.org>
Cc: <stephen@networkplumber.org>
Subject: RE: [PATCH v11] net: optimize raw checksum computation
Date: Fri, 9 Jan 2026 19:28:05 +0100	[thread overview]
Message-ID: <98CBD80474FA8B44BF855DF32C47DC35F6563F@smartserver.smartshare.dk> (raw)
In-Reply-To: <20260108230509.6541-1-scott.k.mitch1@gmail.com>

>  static inline uint32_t
>  __rte_raw_cksum(const void *buf, size_t len, uint32_t sum)
>  {
> -	const void *end;
> -
> -	for (end = RTE_PTR_ADD(buf, RTE_ALIGN_FLOOR(len,
> sizeof(uint16_t)));
> -	     buf != end; buf = RTE_PTR_ADD(buf, sizeof(uint16_t))) {
> -		uint16_t v;
> -
> -		memcpy(&v, buf, sizeof(uint16_t));
> -		sum += v;
> -	}
> +	/* Process uint16 chunks to preserve overflow/carry math.
> GCC/Clang vectorize the loop. */
> +	const unaligned_uint16_t *buf16 = (const unaligned_uint16_t
> *)buf;
> +	const unaligned_uint16_t *end = buf16 + (len / sizeof(uint16_t));
> +	for (; buf16 != end; buf16++)
> +		sum += *buf16;

Here are some more thoughts about loop unroll...
In another mail [1], you are discussing manual loop unroll for rte_ipv4/ipv6_phdr_cksum().
Perhaps the compiler already loop unrolls those.
Check the assembler output for the existing code calling __rte_raw_cksum().
If the compiler doesn't loop unroll __rte_raw_cksum() for those two functions, maybe you can help it by modifying __rte_raw_cksum(); try replacing the end pointer with an int counter, which will be compile time constant when called by rte_ipv4/ipv6_phdr_cksum().

[1]: https://inbox.dpdk.org/dev/CAFn2buA5NzmzA0+t1_5auigvQTyT7Ne6RMVaPVU=sdC03nd2Lg@mail.gmail.com/

PS: I do the following when optimizing inline functions: Add non-inline functions calling the inline functions, and then use "objdump -S" to look at the generated code. E.g.:

uint32_t review__rte_raw_cksum(const void *buf, size_t len, uint32_t sum)
{ return __rte_raw_cksum(buf, len, sum); }

uint32_t review__rte_raw_cksum_len20(const void *buf, uint32_t sum)
{ return __rte_raw_cksum(buf, 20, sum); }

uint32_t review__rte_raw_cksum_len8(const void *buf, uint32_t sum)
{ return __rte_raw_cksum(buf, 8, sum); }

> 
>  	/* if length is odd, keeping it byte order independent */
> -	if (unlikely(len % 2)) {
> +	if (len & 1) {
>  		uint16_t left = 0;
> -
>  		memcpy(&left, end, 1);
>  		sum += left;
>  	}
> diff --git a/lib/net/rte_ip4.h b/lib/net/rte_ip4.h
> index 822a660cfb..63852717c9 100644
> --- a/lib/net/rte_ip4.h
> +++ b/lib/net/rte_ip4.h
> @@ -223,21 +223,17 @@ rte_ipv4_phdr_cksum(const struct rte_ipv4_hdr
> *ipv4_hdr, uint64_t ol_flags)
>  		uint8_t  zero;     /* zero. */
>  		uint8_t  proto;    /* L4 protocol type. */
>  		uint16_t len;      /* L4 length. */
> -	} psd_hdr;
> -
> -	uint32_t l3_len;
> -
> -	psd_hdr.src_addr = ipv4_hdr->src_addr;
> -	psd_hdr.dst_addr = ipv4_hdr->dst_addr;
> -	psd_hdr.zero = 0;
> -	psd_hdr.proto = ipv4_hdr->next_proto_id;
> -	if (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) {
> -		psd_hdr.len = 0;
> -	} else {
> -		l3_len = rte_be_to_cpu_16(ipv4_hdr->total_length);
> -		psd_hdr.len = rte_cpu_to_be_16((uint16_t)(l3_len -
> -			rte_ipv4_hdr_len(ipv4_hdr)));
> -	}
> +	} psd_hdr = {
> +		.src_addr = ipv4_hdr->src_addr,
> +		.dst_addr = ipv4_hdr->dst_addr,
> +		.proto = ipv4_hdr->next_proto_id,
> +		.len = (ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
> RTE_MBUF_F_TX_UDP_SEG))
> +			? (uint16_t)0
> +			:
> rte_cpu_to_be_16((uint16_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) -
> +					rte_ipv4_hdr_len(ipv4_hdr)))
> +	};
> +	RTE_SUPPRESS_UNINITIALIZED_WARNING(psd_hdr);
> +
>  	return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
>  }
> 
> diff --git a/lib/net/rte_ip6.h b/lib/net/rte_ip6.h
> index d1abf1f5d5..8a7e5e4b8a 100644
> --- a/lib/net/rte_ip6.h
> +++ b/lib/net/rte_ip6.h
> @@ -560,19 +560,18 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr
> *ipv6_hdr, uint64_t ol_flags)
>  static inline uint16_t
>  rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t
> ol_flags)
>  {
> -	uint32_t sum;
>  	struct {
>  		rte_be32_t len;   /* L4 length. */
>  		rte_be32_t proto; /* L4 protocol - top 3 bytes must be zero
> */
> -	} psd_hdr;
> -
> -	psd_hdr.proto = (uint32_t)(ipv6_hdr->proto << 24);
> -	if (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG))
> -		psd_hdr.len = 0;
> -	else
> -		psd_hdr.len = ipv6_hdr->payload_len;
> +	} psd_hdr = {
> +		.len = (ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
> RTE_MBUF_F_TX_UDP_SEG))
> +			? (rte_be32_t)0
> +			: ipv6_hdr->payload_len,
> +		.proto = (uint32_t)(ipv6_hdr->proto << 24)
> +	};
> +	RTE_SUPPRESS_UNINITIALIZED_WARNING(psd_hdr);
> 
> -	sum = __rte_raw_cksum(&ipv6_hdr->src_addr,
> +	uint32_t sum = __rte_raw_cksum(&ipv6_hdr->src_addr,
>  		sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr),
>  		0);
>  	sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum);
> --
> 2.39.5 (Apple Git-154)


  parent reply	other threads:[~2026-01-09 18:28 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-08 23:05 scott.k.mitch1
2026-01-09  0:44 ` Scott Mitchell
2026-01-09  9:26 ` Morten Brørup
2026-01-09 15:27   ` Scott Mitchell
2026-01-09 15:58     ` Morten Brørup
2026-01-09 17:23       ` Scott Mitchell
2026-01-09 22:12     ` Morten Brørup
2026-01-10  4:19       ` Scott Mitchell
2026-01-09 18:28 ` Morten Brørup [this message]
2026-01-10  3:41   ` Scott Mitchell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=98CBD80474FA8B44BF855DF32C47DC35F6563F@smartserver.smartshare.dk \
    --to=mb@smartsharesystems.com \
    --cc=dev@dpdk.org \
    --cc=scott.k.mitch1@gmail.com \
    --cc=stephen@networkplumber.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).