From: Olivier Matz <olivier.matz@6wind.com>
To: dev@dpdk.org, yuanhan.liu@linux.intel.com
Cc: maxime.coquelin@redhat.com, huawei.xie@intel.com,
stephen@networkplumber.org
Subject: [dpdk-dev] [PATCH 5/5] net/virtio: fix Tso when mbuf is shared
Date: Thu, 24 Nov 2016 09:56:38 +0100 [thread overview]
Message-ID: <1479977798-13417-6-git-send-email-olivier.matz@6wind.com> (raw)
In-Reply-To: <1479977798-13417-1-git-send-email-olivier.matz@6wind.com>
With virtio, doing tso requires to modify the network
packet data:
- the dpdk API requires to set the l4 checksum to an
Intel-Nic-like pseudo header checksum that does
not include the ip length
- the virtio peer expects that the l4 checksum is
a standard pseudo header checksum.
This is a problem with shared packets, because they
should not be modified.
This patch fixes this issue by copying the headers into
a linear buffer in that case. This buffer is located in
the virtio_tx_region, at the same place where the
virtio header is stored.
The size of this buffer is set to 256, which should
be enough in all cases:
sizeof(ethernet) + sizeof(vlan) * 2 + sizeof(ip6)
sizeof(ip6-ext) + sizeof(tcp) + sizeof(tcp-opts)
= 14 + 8 + 40 + sizeof(ip6-ext) + 40 + sizeof(tcp-opts)
= 102 + sizeof(ip6-ext) + sizeof(tcp-opts)
Fixes: 696573046e9e ("net/virtio: support TSO")
Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
---
drivers/net/virtio/virtio_rxtx.c | 119 +++++++++++++++++++++++++++------------
drivers/net/virtio/virtqueue.h | 2 +
2 files changed, 85 insertions(+), 36 deletions(-)
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 22d97a4..577c775 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -211,43 +211,73 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
/* When doing TSO, the IP length is not included in the pseudo header
* checksum of the packet given to the PMD, but for virtio it is
- * expected.
+ * expected. Fix the mbuf or a copy if the mbuf is shared.
*/
-static void
-virtio_tso_fix_cksum(struct rte_mbuf *m)
+static unsigned int
+virtio_tso_fix_cksum(struct rte_mbuf *m, char *hdr, size_t hdr_sz)
{
- /* common case: header is not fragmented */
- if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
- m->l4_len)) {
- struct ipv4_hdr *iph;
- struct ipv6_hdr *ip6h;
- struct tcp_hdr *th;
- uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
- uint32_t tmp;
-
- iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
- th = RTE_PTR_ADD(iph, m->l3_len);
- if ((iph->version_ihl >> 4) == 4) {
- iph->hdr_checksum = 0;
- iph->hdr_checksum = rte_ipv4_cksum(iph);
- ip_len = iph->total_length;
- ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
- m->l3_len);
- } else {
- ip6h = (struct ipv6_hdr *)iph;
- ip_paylen = ip6h->payload_len;
+ struct ipv4_hdr *iph, iph_copy;
+ struct ipv6_hdr *ip6h = NULL, ip6h_copy;
+ struct tcp_hdr *th, th_copy;
+ size_t hdrlen = m->l2_len + m->l3_len + m->l4_len;
+ uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
+ uint32_t tmp;
+ int shared = 0;
+
+ /* mbuf is write-only, we need to copy the headers in a linear buffer */
+ if (unlikely(rte_pktmbuf_data_is_shared(m, 0, hdrlen))) {
+ shared = 1;
+
+ /* network headers are too big, there's nothing we can do */
+ if (hdrlen > hdr_sz)
+ return 0;
+
+ rte_pktmbuf_read_copy(m, 0, hdrlen, hdr);
+ iph = (struct ipv4_hdr *)(hdr + m->l2_len);
+ ip6h = (struct ipv6_hdr *)(hdr + m->l2_len);
+ th = (struct tcp_hdr *)(hdr + m->l2_len + m->l3_len);
+ } else {
+ iph = rte_pktmbuf_read(m, m->l2_len, sizeof(*iph), &iph_copy);
+ th = rte_pktmbuf_read(m, m->l2_len + m->l3_len, sizeof(*th),
+ &th_copy);
+ }
+
+ if ((iph->version_ihl >> 4) == 4) {
+ iph->hdr_checksum = 0;
+ iph->hdr_checksum = rte_ipv4_cksum(iph);
+ ip_len = iph->total_length;
+ ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
+ m->l3_len);
+ } else {
+ if (!shared) {
+ ip6h = rte_pktmbuf_read(m, m->l2_len, sizeof(*ip6h),
+ &ip6h_copy);
}
+ ip_paylen = ip6h->payload_len;
+ }
- /* calculate the new phdr checksum not including ip_paylen */
- prev_cksum = th->cksum;
- tmp = prev_cksum;
- tmp += ip_paylen;
- tmp = (tmp & 0xffff) + (tmp >> 16);
- new_cksum = tmp;
+ /* calculate the new phdr checksum not including ip_paylen */
+ prev_cksum = th->cksum;
+ tmp = prev_cksum;
+ tmp += ip_paylen;
+ tmp = (tmp & 0xffff) + (tmp >> 16);
+ new_cksum = tmp;
- /* replace it in the packet */
- th->cksum = new_cksum;
- }
+ /* replace it in the header */
+ th->cksum = new_cksum;
+
+ /* the update was done in the linear buffer, return */
+ if (shared)
+ return hdrlen;
+
+ /* copy from local buffer into mbuf if required */
+ if ((iph->version_ihl >> 4) == 4)
+ rte_pktmbuf_write(m, m->l2_len, sizeof(*iph), iph);
+ else
+ rte_pktmbuf_write(m, m->l2_len, sizeof(*ip6h), ip6h);
+ rte_pktmbuf_write(m, m->l2_len + m->l3_len, sizeof(*th), th);
+
+ return 0;
}
static inline int
@@ -268,7 +298,9 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
struct vring_desc *start_dp;
uint16_t seg_num = cookie->nb_segs;
uint16_t head_idx, idx;
+ uint16_t hdr_idx = 0;
uint16_t head_size = vq->hw->vtnet_hdr_size;
+ unsigned int offset = 0;
struct virtio_net_hdr *hdr;
int offload;
@@ -303,6 +335,8 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
/* loop below will fill in rest of the indirect elements */
start_dp = txr[idx].tx_indir;
+ hdr_idx = 0;
+ start_dp[hdr_idx].len = vq->hw->vtnet_hdr_size;
idx = 1;
} else {
/* setup first tx ring slot to point to header
@@ -313,7 +347,7 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
start_dp[idx].len = vq->hw->vtnet_hdr_size;
start_dp[idx].flags = VRING_DESC_F_NEXT;
hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
-
+ hdr_idx = idx;
idx = start_dp[idx].next;
}
@@ -345,7 +379,14 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
/* TCP Segmentation Offload */
if (cookie->ol_flags & PKT_TX_TCP_SEG) {
- virtio_tso_fix_cksum(cookie);
+ offset = virtio_tso_fix_cksum(cookie,
+ RTE_PTR_ADD(hdr, start_dp[hdr_idx].len),
+ VIRTIO_MAX_HDR_SZ);
+ if (offset > 0) {
+ RTE_ASSERT(can_push != 0);
+ start_dp[hdr_idx].len += offset;
+ }
+
hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
VIRTIO_NET_HDR_GSO_TCPV6 :
VIRTIO_NET_HDR_GSO_TCPV4;
@@ -362,10 +403,16 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
}
do {
- start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
- start_dp[idx].len = cookie->data_len;
+ if (offset > cookie->data_len) {
+ offset -= cookie->data_len;
+ continue;
+ }
+ start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq) +
+ offset;
+ start_dp[idx].len = cookie->data_len - offset;
start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
idx = start_dp[idx].next;
+ offset = 0;
} while ((cookie = cookie->next) != NULL);
if (use_indirect)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index f0bb089..edfe0dd 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -254,8 +254,10 @@ struct virtio_net_hdr_mrg_rxbuf {
/* Region reserved to allow for transmit header and indirect ring */
#define VIRTIO_MAX_TX_INDIRECT 8
+#define VIRTIO_MAX_HDR_SZ 256
struct virtio_tx_region {
struct virtio_net_hdr_mrg_rxbuf tx_hdr;
+ char net_headers[VIRTIO_MAX_HDR_SZ]; /* for offload if mbuf is RO */
struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT]
__attribute__((__aligned__(16)));
};
--
2.8.1
next prev parent reply other threads:[~2016-11-24 8:57 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-11-24 8:56 [dpdk-dev] [PATCH 0/5] virtio/mbuf: fix virtio tso with shared mbufs Olivier Matz
2016-11-24 8:56 ` [dpdk-dev] [PATCH 1/5] mbuf: remove const attribute in mbuf read function Olivier Matz
2016-11-24 8:56 ` [dpdk-dev] [PATCH 2/5] mbuf: new helper to check if a mbuf is shared Olivier Matz
2016-11-24 8:56 ` [dpdk-dev] [PATCH 3/5] mbuf: new helper to write data in a mbuf chain Olivier Matz
2016-11-24 8:56 ` [dpdk-dev] [PATCH 4/5] mbuf: new helper to copy data from a mbuf Olivier Matz
2016-11-24 8:56 ` Olivier Matz [this message]
2016-12-14 7:27 ` [dpdk-dev] [PATCH 5/5] net/virtio: fix Tso when mbuf is shared Yuanhan Liu
2017-01-09 17:46 ` Olivier Matz
2017-01-16 6:48 ` Yuanhan Liu
2017-01-17 11:18 ` Olivier Matz
2017-01-18 5:03 ` Yuanhan Liu
2017-01-24 10:51 ` Olivier MATZ
2017-01-28 12:32 ` Yuanhan Liu
2017-01-09 17:59 ` Stephen Hemminger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1479977798-13417-6-git-send-email-olivier.matz@6wind.com \
--to=olivier.matz@6wind.com \
--cc=dev@dpdk.org \
--cc=huawei.xie@intel.com \
--cc=maxime.coquelin@redhat.com \
--cc=stephen@networkplumber.org \
--cc=yuanhan.liu@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).