DPDK patches and discussions
 help / color / mirror / Atom feed
From: Ophir Munk <ophirmu@mellanox.com>
To: dev@dpdk.org, Pascal Mazon <pascal.mazon@6wind.com>,
	Keith Wiles <keith.wiles@intel.com>
Cc: Thomas Monjalon <thomas@monjalon.net>,
	Olga Shern <olgas@mellanox.com>,
	Ophir Munk <ophirmu@mellanox.com>
Subject: [dpdk-dev] [PATCH v4 1/2] net/tap: calculate checksums of multi segs packets
Date: Tue, 12 Jun 2018 16:31:47 +0000	[thread overview]
Message-ID: <1528821108-12405-2-git-send-email-ophirmu@mellanox.com> (raw)
In-Reply-To: <1528821108-12405-1-git-send-email-ophirmu@mellanox.com>

Prior to this commit IP/UDP/TCP checksum offload calculations
were skipped in case of a multi segments packet.
This commit enables TAP checksum calculations for multi segments
packets.
The only restriction is that the first segment must contain
headers of layers 3 (IP) and 4 (UDP or TCP)

Reviewed-by: Raslan Darawsheh <rasland@mellanox.com>
Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
---
 drivers/net/tap/rte_eth_tap.c | 158 +++++++++++++++++++++++++++++-------------
 1 file changed, 110 insertions(+), 48 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index df396bf..c19f053 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -415,12 +415,43 @@ tap_tx_offload_get_queue_capa(void)
 	       DEV_TX_OFFLOAD_TCP_CKSUM;
 }
 
+/* Finalize l4 checksum calculation */
 static void
-tap_tx_offload(char *packet, uint64_t ol_flags, unsigned int l2_len,
-	       unsigned int l3_len)
+tap_tx_l4_cksum(uint16_t *l4_cksum, uint16_t l4_phdr_cksum,
+		uint32_t l4_raw_cksum)
 {
-	void *l3_hdr = packet + l2_len;
+	if (l4_cksum) {
+		uint32_t cksum;
+
+		cksum = __rte_raw_cksum_reduce(l4_raw_cksum);
+		cksum += l4_phdr_cksum;
+
+		cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
+		cksum = (~cksum) & 0xffff;
+		if (cksum == 0)
+			cksum = 0xffff;
+		*l4_cksum = cksum;
+	}
+}
 
+/* Accumaulate L4 raw checksums */
+static void
+tap_tx_l4_add_rcksum(char *l4_data, unsigned int l4_len, uint16_t *l4_cksum,
+			uint32_t *l4_raw_cksum)
+{
+	if (l4_cksum == NULL)
+		return;
+
+	*l4_raw_cksum = __rte_raw_cksum(l4_data, l4_len, *l4_raw_cksum);
+}
+
+/* L3 and L4 pseudo headers checksum offloads */
+static void
+tap_tx_l3_cksum(char *packet, uint64_t ol_flags, unsigned int l2_len,
+		unsigned int l3_len, unsigned int l4_len, uint16_t **l4_cksum,
+		uint16_t *l4_phdr_cksum, uint32_t *l4_raw_cksum)
+{
+	void *l3_hdr = packet + l2_len;
 	if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_IPV4)) {
 		struct ipv4_hdr *iph = l3_hdr;
 		uint16_t cksum;
@@ -430,38 +461,21 @@ tap_tx_offload(char *packet, uint64_t ol_flags, unsigned int l2_len,
 		iph->hdr_checksum = (cksum == 0xffff) ? cksum : ~cksum;
 	}
 	if (ol_flags & PKT_TX_L4_MASK) {
-		uint16_t l4_len;
-		uint32_t cksum;
-		uint16_t *l4_cksum;
 		void *l4_hdr;
 
 		l4_hdr = packet + l2_len + l3_len;
 		if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM)
-			l4_cksum = &((struct udp_hdr *)l4_hdr)->dgram_cksum;
+			*l4_cksum = &((struct udp_hdr *)l4_hdr)->dgram_cksum;
 		else if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM)
-			l4_cksum = &((struct tcp_hdr *)l4_hdr)->cksum;
+			*l4_cksum = &((struct tcp_hdr *)l4_hdr)->cksum;
 		else
 			return;
-		*l4_cksum = 0;
-		if (ol_flags & PKT_TX_IPV4) {
-			struct ipv4_hdr *iph = l3_hdr;
-
-			l4_len = rte_be_to_cpu_16(iph->total_length) - l3_len;
-			cksum = rte_ipv4_phdr_cksum(l3_hdr, 0);
-		} else {
-			struct ipv6_hdr *ip6h = l3_hdr;
-
-			/* payload_len does not include ext headers */
-			l4_len = rte_be_to_cpu_16(ip6h->payload_len) -
-				l3_len + sizeof(struct ipv6_hdr);
-			cksum = rte_ipv6_phdr_cksum(l3_hdr, 0);
-		}
-		cksum += rte_raw_cksum(l4_hdr, l4_len);
-		cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
-		cksum = (~cksum) & 0xffff;
-		if (cksum == 0)
-			cksum = 0xffff;
-		*l4_cksum = cksum;
+		**l4_cksum = 0;
+		if (ol_flags & PKT_TX_IPV4)
+			*l4_phdr_cksum = rte_ipv4_phdr_cksum(l3_hdr, 0);
+		else
+			*l4_phdr_cksum = rte_ipv6_phdr_cksum(l3_hdr, 0);
+		*l4_raw_cksum = __rte_raw_cksum(l4_hdr, l4_len, 0);
 	}
 }
 
@@ -482,17 +496,27 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	max_size = *txq->mtu + (ETHER_HDR_LEN + ETHER_CRC_LEN + 4);
 	for (i = 0; i < nb_pkts; i++) {
 		struct rte_mbuf *mbuf = bufs[num_tx];
-		struct iovec iovecs[mbuf->nb_segs + 1];
+		struct iovec iovecs[mbuf->nb_segs + 2];
 		struct tun_pi pi = { .flags = 0, .proto = 0x00 };
 		struct rte_mbuf *seg = mbuf;
 		char m_copy[mbuf->data_len];
+		int proto;
 		int n;
 		int j;
+		int k; /* first index in iovecs for copying segments */
+		uint16_t l234_hlen; /* length of layers 2,3,4 headers */
+		uint16_t seg_len; /* length of first segment */
+		uint16_t nb_segs;
+		uint16_t *l4_cksum; /* l4 checksum (pseudo header + payload) */
+		uint32_t l4_raw_cksum = 0; /* TCP/UDP payload raw checksum */
+		uint16_t l4_phdr_cksum = 0; /* TCP/UDP pseudo header checksum */
+		uint16_t is_cksum = 0; /* in case cksum should be offloaded */
 
 		/* stats.errs will be incremented */
 		if (rte_pktmbuf_pkt_len(mbuf) > max_size)
 			break;
 
+		l4_cksum = NULL;
 		if (txq->type == ETH_TUNTAP_TYPE_TUN) {
 			/*
 			 * TUN and TAP are created with IFF_NO_PI disabled.
@@ -505,35 +529,73 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			 * is 4 or 6, then protocol field is updated.
 			 */
 			char *buff_data = rte_pktmbuf_mtod(seg, void *);
-			j = (*buff_data & 0xf0);
-			pi.proto = (j == 0x40) ? rte_cpu_to_be_16(ETHER_TYPE_IPv4) :
-				(j == 0x60) ? rte_cpu_to_be_16(ETHER_TYPE_IPv6) : 0x00;
+			proto = (*buff_data & 0xf0);
+			pi.proto = (proto == 0x40) ?
+				rte_cpu_to_be_16(ETHER_TYPE_IPv4) :
+				((proto == 0x60) ?
+					rte_cpu_to_be_16(ETHER_TYPE_IPv6) :
+					0x00);
 		}
 
-		iovecs[0].iov_base = &pi;
-		iovecs[0].iov_len = sizeof(pi);
-		for (j = 1; j <= mbuf->nb_segs; j++) {
-			iovecs[j].iov_len = rte_pktmbuf_data_len(seg);
-			iovecs[j].iov_base =
-				rte_pktmbuf_mtod(seg, void *);
-			seg = seg->next;
-		}
+		k = 0;
+		iovecs[k].iov_base = &pi;
+		iovecs[k].iov_len = sizeof(pi);
+		k++;
+		nb_segs = mbuf->nb_segs;
 		if (txq->csum &&
 		    ((mbuf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_IPV4) ||
 		     (mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM ||
 		     (mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM))) {
-			/* Support only packets with all data in the same seg */
-			if (mbuf->nb_segs > 1)
+			is_cksum = 1;
+			/* Support only packets with at least layer 4
+			 * header included in the first segment
+			 */
+			seg_len = rte_pktmbuf_data_len(mbuf);
+			l234_hlen = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
+			if (seg_len < l234_hlen)
 				break;
-			/* To change checksums, work on a copy of data. */
+
+			/* To change checksums, work on a
+			 * copy of l2, l3 l4 headers.
+			 */
 			rte_memcpy(m_copy, rte_pktmbuf_mtod(mbuf, void *),
-				   rte_pktmbuf_data_len(mbuf));
-			tap_tx_offload(m_copy, mbuf->ol_flags,
-				       mbuf->l2_len, mbuf->l3_len);
-			iovecs[1].iov_base = m_copy;
+					l234_hlen);
+			tap_tx_l3_cksum(m_copy, mbuf->ol_flags,
+				       mbuf->l2_len, mbuf->l3_len, mbuf->l4_len,
+				       &l4_cksum, &l4_phdr_cksum,
+				       &l4_raw_cksum);
+			iovecs[k].iov_base = m_copy;
+			iovecs[k].iov_len = l234_hlen;
+			k++;
+			/* Update next iovecs[] beyond l2, l3, l4 headers */
+			if (seg_len > l234_hlen) {
+				iovecs[k].iov_len = seg_len - l234_hlen;
+				iovecs[k].iov_base =
+					rte_pktmbuf_mtod(seg, char *) +
+						l234_hlen;
+				tap_tx_l4_add_rcksum(iovecs[k].iov_base,
+					iovecs[k].iov_len, l4_cksum,
+					&l4_raw_cksum);
+				k++;
+				nb_segs++;
+			}
+			seg = seg->next;
 		}
+		for (j = k; j <= nb_segs; j++) {
+			iovecs[j].iov_len = rte_pktmbuf_data_len(seg);
+			iovecs[j].iov_base = rte_pktmbuf_mtod(seg, void *);
+			if (is_cksum)
+				tap_tx_l4_add_rcksum(iovecs[j].iov_base,
+					iovecs[j].iov_len, l4_cksum,
+					&l4_raw_cksum);
+			seg = seg->next;
+		}
+
+		if (is_cksum)
+			tap_tx_l4_cksum(l4_cksum, l4_phdr_cksum, l4_raw_cksum);
+
 		/* copy the tx frame data */
-		n = writev(txq->fd, iovecs, mbuf->nb_segs + 1);
+		n = writev(txq->fd, iovecs, j);
 		if (n <= 0)
 			break;
 
-- 
2.7.4

  reply	other threads:[~2018-06-12 16:32 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-09 21:10 [dpdk-dev] [RFC 0/2] TAP TSO Implementation Ophir Munk
2018-03-09 21:10 ` [dpdk-dev] [RFC 1/2] net/tap: calculate checksum for multi segs packets Ophir Munk
2018-04-09 22:33   ` [dpdk-dev] [PATCH v1 0/2] TAP TSO Ophir Munk
2018-04-09 22:33     ` [dpdk-dev] [PATCH v1 1/2] net/tap: calculate checksums of multi segs packets Ophir Munk
2018-04-09 22:33     ` [dpdk-dev] [PATCH v1 2/2] net/tap: support TSO (TCP Segment Offload) Ophir Munk
2018-04-22 11:30       ` [dpdk-dev] [PATCH v2 0/2] TAP TSO Ophir Munk
2018-04-22 11:30         ` [dpdk-dev] [PATCH v2 1/2] net/tap: calculate checksums of multi segs packets Ophir Munk
2018-05-07 21:54           ` [dpdk-dev] [PATCH v3 0/2] TAP TSO Ophir Munk
2018-05-07 21:54             ` [dpdk-dev] [PATCH v3 1/2] net/tap: calculate checksums of multi segs packets Ophir Munk
2018-05-07 21:54             ` [dpdk-dev] [PATCH v3 2/2] net/tap: support TSO (TCP Segment Offload) Ophir Munk
2018-05-31 13:52           ` [dpdk-dev] [PATCH v2 1/2] net/tap: calculate checksums of multi segs packets Ferruh Yigit
2018-05-31 13:54             ` Ferruh Yigit
2018-04-22 11:30         ` [dpdk-dev] [PATCH v2 2/2] net/tap: support TSO (TCP Segment Offload) Ophir Munk
2018-06-12 16:31   ` [dpdk-dev] [PATCH v4 0/2] TAP TSO Ophir Munk
2018-06-12 16:31     ` Ophir Munk [this message]
2018-06-12 17:17       ` [dpdk-dev] [PATCH v4 1/2] net/tap: calculate checksums of multi segs packets Wiles, Keith
2018-06-12 16:31     ` [dpdk-dev] [PATCH v4 2/2] net/tap: support TSO (TCP Segment Offload) Ophir Munk
2018-06-12 17:22       ` Wiles, Keith
2018-06-13 16:04       ` Wiles, Keith
2018-06-14  7:59         ` Ophir Munk
2018-06-14 12:58           ` Wiles, Keith
2018-06-23 23:17       ` [dpdk-dev] [PATCH v5 0/2] TAP TSO Ophir Munk
2018-06-23 23:17         ` [dpdk-dev] [PATCH v5 1/2] net/tap: calculate checksums of multi segs packets Ophir Munk
2018-06-24 13:45           ` Wiles, Keith
2018-06-27 13:11             ` Ferruh Yigit
2018-06-23 23:17         ` [dpdk-dev] [PATCH v5 2/2] net/tap: support TSO (TCP Segment Offload) Ophir Munk
2018-03-09 21:10 ` [dpdk-dev] [RFC 2/2] net/tap: implement TAP TSO Ophir Munk
2018-04-09 16:38 ` [dpdk-dev] [RFC 0/2] TAP TSO Implementation Ferruh Yigit
2018-04-09 22:37   ` Ophir Munk
2018-04-10 14:30     ` Ferruh Yigit
2018-04-10 15:31       ` Ophir Munk

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1528821108-12405-2-git-send-email-ophirmu@mellanox.com \
    --to=ophirmu@mellanox.com \
    --cc=dev@dpdk.org \
    --cc=keith.wiles@intel.com \
    --cc=olgas@mellanox.com \
    --cc=pascal.mazon@6wind.com \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).