DPDK patches and discussions
 help / color / mirror / Atom feed
From: Olivier Matz <olivier.matz@6wind.com>
To: dev@dpdk.org, yuanhan.liu@linux.intel.com
Cc: maxime.coquelin@redhat.com, huawei.xie@intel.com,
	stephen@networkplumber.org
Subject: [dpdk-dev] [PATCH 5/5] net/virtio: fix Tso when mbuf is shared
Date: Thu, 24 Nov 2016 09:56:38 +0100	[thread overview]
Message-ID: <1479977798-13417-6-git-send-email-olivier.matz@6wind.com> (raw)
In-Reply-To: <1479977798-13417-1-git-send-email-olivier.matz@6wind.com>

With virtio, doing tso requires to modify the network
packet data:
- the dpdk API requires to set the l4 checksum to an
  Intel-Nic-like pseudo header checksum that does
  not include the ip length
- the virtio peer expects that the l4 checksum is
  a standard pseudo header checksum.

This is a problem with shared packets, because they
should not be modified.

This patch fixes this issue by copying the headers into
a linear buffer in that case. This buffer is located in
the virtio_tx_region, at the same place where the
virtio header is stored.

The size of this buffer is set to 256, which should
be enough in all cases:
  sizeof(ethernet) + sizeof(vlan) * 2 + sizeof(ip6)
    sizeof(ip6-ext) + sizeof(tcp) + sizeof(tcp-opts)
  = 14 + 8 + 40 + sizeof(ip6-ext) + 40 + sizeof(tcp-opts)
  = 102 + sizeof(ip6-ext) + sizeof(tcp-opts)

Fixes: 696573046e9e ("net/virtio: support TSO")

Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
---
 drivers/net/virtio/virtio_rxtx.c | 119 +++++++++++++++++++++++++++------------
 drivers/net/virtio/virtqueue.h   |   2 +
 2 files changed, 85 insertions(+), 36 deletions(-)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 22d97a4..577c775 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -211,43 +211,73 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
 
 /* When doing TSO, the IP length is not included in the pseudo header
  * checksum of the packet given to the PMD, but for virtio it is
- * expected.
+ * expected. Fix the mbuf or a copy if the mbuf is shared.
  */
-static void
-virtio_tso_fix_cksum(struct rte_mbuf *m)
+static unsigned int
+virtio_tso_fix_cksum(struct rte_mbuf *m, char *hdr, size_t hdr_sz)
 {
-	/* common case: header is not fragmented */
-	if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
-			m->l4_len)) {
-		struct ipv4_hdr *iph;
-		struct ipv6_hdr *ip6h;
-		struct tcp_hdr *th;
-		uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
-		uint32_t tmp;
-
-		iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
-		th = RTE_PTR_ADD(iph, m->l3_len);
-		if ((iph->version_ihl >> 4) == 4) {
-			iph->hdr_checksum = 0;
-			iph->hdr_checksum = rte_ipv4_cksum(iph);
-			ip_len = iph->total_length;
-			ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
-				m->l3_len);
-		} else {
-			ip6h = (struct ipv6_hdr *)iph;
-			ip_paylen = ip6h->payload_len;
+	struct ipv4_hdr *iph, iph_copy;
+	struct ipv6_hdr *ip6h = NULL, ip6h_copy;
+	struct tcp_hdr *th, th_copy;
+	size_t hdrlen = m->l2_len + m->l3_len + m->l4_len;
+	uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
+	uint32_t tmp;
+	int shared = 0;
+
+	/* mbuf is write-only, we need to copy the headers in a linear buffer */
+	if (unlikely(rte_pktmbuf_data_is_shared(m, 0, hdrlen))) {
+		shared = 1;
+
+		/* network headers are too big, there's nothing we can do */
+		if (hdrlen > hdr_sz)
+			return 0;
+
+		rte_pktmbuf_read_copy(m, 0, hdrlen, hdr);
+		iph = (struct ipv4_hdr *)(hdr + m->l2_len);
+		ip6h = (struct ipv6_hdr *)(hdr + m->l2_len);
+		th = (struct tcp_hdr *)(hdr + m->l2_len + m->l3_len);
+	} else {
+		iph = rte_pktmbuf_read(m, m->l2_len, sizeof(*iph), &iph_copy);
+		th = rte_pktmbuf_read(m, m->l2_len + m->l3_len, sizeof(*th),
+			&th_copy);
+	}
+
+	if ((iph->version_ihl >> 4) == 4) {
+		iph->hdr_checksum = 0;
+		iph->hdr_checksum = rte_ipv4_cksum(iph);
+		ip_len = iph->total_length;
+		ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
+			m->l3_len);
+	} else {
+		if (!shared) {
+			ip6h = rte_pktmbuf_read(m, m->l2_len, sizeof(*ip6h),
+				&ip6h_copy);
 		}
+		ip_paylen = ip6h->payload_len;
+	}
 
-		/* calculate the new phdr checksum not including ip_paylen */
-		prev_cksum = th->cksum;
-		tmp = prev_cksum;
-		tmp += ip_paylen;
-		tmp = (tmp & 0xffff) + (tmp >> 16);
-		new_cksum = tmp;
+	/* calculate the new phdr checksum not including ip_paylen */
+	prev_cksum = th->cksum;
+	tmp = prev_cksum;
+	tmp += ip_paylen;
+	tmp = (tmp & 0xffff) + (tmp >> 16);
+	new_cksum = tmp;
 
-		/* replace it in the packet */
-		th->cksum = new_cksum;
-	}
+	/* replace it in the header */
+	th->cksum = new_cksum;
+
+	/* the update was done in the linear buffer, return */
+	if (shared)
+		return hdrlen;
+
+	/* copy from local buffer into mbuf if required */
+	if ((iph->version_ihl >> 4) == 4)
+		rte_pktmbuf_write(m, m->l2_len, sizeof(*iph), iph);
+	else
+		rte_pktmbuf_write(m, m->l2_len, sizeof(*ip6h), ip6h);
+	rte_pktmbuf_write(m, m->l2_len + m->l3_len, sizeof(*th), th);
+
+	return 0;
 }
 
 static inline int
@@ -268,7 +298,9 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 	struct vring_desc *start_dp;
 	uint16_t seg_num = cookie->nb_segs;
 	uint16_t head_idx, idx;
+	uint16_t hdr_idx = 0;
 	uint16_t head_size = vq->hw->vtnet_hdr_size;
+	unsigned int offset = 0;
 	struct virtio_net_hdr *hdr;
 	int offload;
 
@@ -303,6 +335,8 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 
 		/* loop below will fill in rest of the indirect elements */
 		start_dp = txr[idx].tx_indir;
+		hdr_idx = 0;
+		start_dp[hdr_idx].len = vq->hw->vtnet_hdr_size;
 		idx = 1;
 	} else {
 		/* setup first tx ring slot to point to header
@@ -313,7 +347,7 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
 		start_dp[idx].flags = VRING_DESC_F_NEXT;
 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
-
+		hdr_idx = idx;
 		idx = start_dp[idx].next;
 	}
 
@@ -345,7 +379,14 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 
 		/* TCP Segmentation Offload */
 		if (cookie->ol_flags & PKT_TX_TCP_SEG) {
-			virtio_tso_fix_cksum(cookie);
+			offset = virtio_tso_fix_cksum(cookie,
+				RTE_PTR_ADD(hdr, start_dp[hdr_idx].len),
+				VIRTIO_MAX_HDR_SZ);
+			if (offset > 0) {
+				RTE_ASSERT(can_push != 0);
+				start_dp[hdr_idx].len += offset;
+			}
+
 			hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
 				VIRTIO_NET_HDR_GSO_TCPV6 :
 				VIRTIO_NET_HDR_GSO_TCPV4;
@@ -362,10 +403,16 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 	}
 
 	do {
-		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
-		start_dp[idx].len   = cookie->data_len;
+		if (offset > cookie->data_len) {
+			offset -= cookie->data_len;
+			continue;
+		}
+		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq) +
+			offset;
+		start_dp[idx].len   = cookie->data_len - offset;
 		start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
 		idx = start_dp[idx].next;
+		offset = 0;
 	} while ((cookie = cookie->next) != NULL);
 
 	if (use_indirect)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index f0bb089..edfe0dd 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -254,8 +254,10 @@ struct virtio_net_hdr_mrg_rxbuf {
 
 /* Region reserved to allow for transmit header and indirect ring */
 #define VIRTIO_MAX_TX_INDIRECT 8
+#define VIRTIO_MAX_HDR_SZ 256
 struct virtio_tx_region {
 	struct virtio_net_hdr_mrg_rxbuf tx_hdr;
+	char net_headers[VIRTIO_MAX_HDR_SZ]; /* for offload if mbuf is RO */
 	struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT]
 			   __attribute__((__aligned__(16)));
 };
-- 
2.8.1

  parent reply	other threads:[~2016-11-24  8:57 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-24  8:56 [dpdk-dev] [PATCH 0/5] virtio/mbuf: fix virtio tso with shared mbufs Olivier Matz
2016-11-24  8:56 ` [dpdk-dev] [PATCH 1/5] mbuf: remove const attribute in mbuf read function Olivier Matz
2016-11-24  8:56 ` [dpdk-dev] [PATCH 2/5] mbuf: new helper to check if a mbuf is shared Olivier Matz
2016-11-24  8:56 ` [dpdk-dev] [PATCH 3/5] mbuf: new helper to write data in a mbuf chain Olivier Matz
2016-11-24  8:56 ` [dpdk-dev] [PATCH 4/5] mbuf: new helper to copy data from a mbuf Olivier Matz
2016-11-24  8:56 ` Olivier Matz [this message]
2016-12-14  7:27   ` [dpdk-dev] [PATCH 5/5] net/virtio: fix Tso when mbuf is shared Yuanhan Liu
2017-01-09 17:46     ` Olivier Matz
2017-01-16  6:48       ` Yuanhan Liu
2017-01-17 11:18         ` Olivier Matz
2017-01-18  5:03           ` Yuanhan Liu
2017-01-24 10:51             ` Olivier MATZ
2017-01-28 12:32               ` Yuanhan Liu
2017-01-09 17:59   ` Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1479977798-13417-6-git-send-email-olivier.matz@6wind.com \
    --to=olivier.matz@6wind.com \
    --cc=dev@dpdk.org \
    --cc=huawei.xie@intel.com \
    --cc=maxime.coquelin@redhat.com \
    --cc=stephen@networkplumber.org \
    --cc=yuanhan.liu@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).