patches for DPDK stable branches
 help / color / mirror / Atom feed
From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: dev@dpdk.org, jasowang@redhat.com, chenbo.xia@intel.com,
	david.marchand@redhat.com, olivier.matz@6wind.com
Cc: stable@dpdk.org, Maxime Coquelin <maxime.coquelin@redhat.com>
Subject: [PATCH 5/6] net/vhost: perform SW checksum in Rx path
Date: Thu,  5 May 2022 12:27:28 +0200
Message-ID: <20220505102729.821075-6-maxime.coquelin@redhat.com> (raw)
In-Reply-To: <20220505102729.821075-1-maxime.coquelin@redhat.com>

Virtio specification supports host checksum offloading
for L4, which is enabled with VIRTIO_NET_F_CSUM feature
negotiation. However, the Vhost PMD does not advertise
Rx checksum offload capabilities, so we can end-up with
the VIRTIO_NET_F_CSUM feature being negociated, implying
the Vhost library returns packets with checksum being
offloaded while the application did not request for it.

Advertising these offload capabilities at the ethdev level
is not enough, because we could still end-up with the
application not enabling these offloads while the guest
still negotiate them.

This patch advertizes the Rx checksum offload capabilities,
and introduces a compatibility layer to cover the case
VIRTIO_NET_F_CSUM has been negotiated but the application
does not configure the Rx checksum offloads. This function
performis the L4 Rx checksum in SW for UDP and TCP. Note
that it is not needed to calculate the pseudo-header
checksum, because the Virtio specification requires that
the driver do it.

This patch does not advertize SCTP checksum offloading
capability for now, but it could be handled later if the
need arises.

Reported-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 doc/guides/nics/features/vhost.ini |  1 +
 drivers/net/vhost/rte_eth_vhost.c  | 83 ++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)

diff --git a/doc/guides/nics/features/vhost.ini b/doc/guides/nics/features/vhost.ini
index ef81abb439..15f4dfe5e8 100644
--- a/doc/guides/nics/features/vhost.ini
+++ b/doc/guides/nics/features/vhost.ini
@@ -7,6 +7,7 @@
 Link status          = Y
 Free Tx mbuf on demand = Y
 Queue status event   = Y
+L4 checksum offload  = P
 Basic stats          = Y
 Extended stats       = Y
 x86-32               = Y
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index baa973ad6d..d5303f7368 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -12,6 +12,7 @@
 #include <ethdev_vdev.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
+#include <rte_net.h>
 #include <rte_bus_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_vhost.h>
@@ -107,10 +108,12 @@ struct pmd_internal {
 	char *iface_name;
 	uint64_t flags;
 	uint64_t disable_flags;
+	uint64_t features;
 	uint16_t max_queues;
 	int vid;
 	rte_atomic32_t started;
 	bool vlan_strip;
+	bool rx_sw_csum;
 };
 
 struct internal_list {
@@ -362,6 +365,70 @@ vhost_update_single_packet_xstats(struct vhost_queue *vq, struct rte_mbuf *buf)
 	vhost_count_xcast_packets(vq, buf);
 }
 
+static void
+vhost_dev_csum_configure(struct rte_eth_dev *eth_dev)
+{
+	struct pmd_internal *internal = eth_dev->data->dev_private;
+	const struct rte_eth_rxmode *rxmode = &eth_dev->data->dev_conf.rxmode;
+
+	internal->rx_sw_csum = false;
+
+	/* SW checksum is not compatible with legacy mode */
+	if (!(internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS))
+		return;
+
+	if (internal->features & (1ULL << VIRTIO_NET_F_CSUM)) {
+		if (!(rxmode->offloads &
+				(RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM))) {
+			VHOST_LOG(NOTICE, "Rx csum will be done in SW, may impact performance.");
+			internal->rx_sw_csum = true;
+		}
+	}
+}
+
+static void
+vhost_dev_rx_sw_csum(struct rte_mbuf *mbuf)
+{
+	struct rte_net_hdr_lens hdr_lens;
+	uint32_t ptype, hdr_len;
+	uint16_t csum = 0, csum_offset;
+
+	/* Return early if the L4 checksum was not offloaded */
+	if ((mbuf->ol_flags & RTE_MBUF_F_RX_L4_CKSUM_MASK) != RTE_MBUF_F_RX_L4_CKSUM_NONE)
+		return;
+
+	ptype = rte_net_get_ptype(mbuf, &hdr_lens, RTE_PTYPE_ALL_MASK);
+
+	hdr_len = hdr_lens.l2_len + hdr_lens.l3_len;
+
+	switch (ptype & RTE_PTYPE_L4_MASK) {
+	case RTE_PTYPE_L4_TCP:
+		csum_offset = offsetof(struct rte_tcp_hdr, cksum) + hdr_len;
+		break;
+	case RTE_PTYPE_L4_UDP:
+		csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum) + hdr_len;
+		break;
+	default:
+		/* Unsupported packet type */
+		return;
+	}
+
+	/* The pseudo-header checksum is already performed, as per Virtio spec */
+	if (rte_raw_cksum_mbuf(mbuf, hdr_len, rte_pktmbuf_pkt_len(mbuf) - hdr_len, &csum) < 0)
+		return;
+
+	csum = ~csum;
+	/* See RFC768 */
+	if (unlikely((ptype & RTE_PTYPE_L4_UDP) && csum == 0))
+		csum = 0xffff;
+
+	if (rte_pktmbuf_data_len(mbuf) >= csum_offset + 1)
+		*rte_pktmbuf_mtod_offset(mbuf, uint16_t *, csum_offset) = csum;
+
+	mbuf->ol_flags &= ~RTE_MBUF_F_RX_L4_CKSUM_MASK;
+	mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
+}
+
 static uint16_t
 eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
@@ -402,6 +469,9 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		if (r->internal->vlan_strip)
 			rte_vlan_strip(bufs[i]);
 
+		if (r->internal->rx_sw_csum)
+			vhost_dev_rx_sw_csum(bufs[i]);
+
 		r->stats.bytes += bufs[i]->pkt_len;
 		r->stats.xstats[VHOST_BYTE] += bufs[i]->pkt_len;
 
@@ -805,6 +875,11 @@ new_device(int vid)
 		eth_dev->data->numa_node = newnode;
 #endif
 
+	if (rte_vhost_get_negotiated_features(vid, &internal->features)) {
+		VHOST_LOG(ERR, "Failed to get device features\n");
+		return -1;
+	}
+
 	internal->vid = vid;
 	if (rte_atomic32_read(&internal->started) == 1) {
 		queue_setup(eth_dev, internal);
@@ -827,6 +902,8 @@ new_device(int vid)
 
 	eth_dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
 
+	vhost_dev_csum_configure(eth_dev);
+
 	rte_atomic32_set(&internal->dev_attached, 1);
 	update_queuing_status(eth_dev);
 
@@ -1131,6 +1208,8 @@ eth_dev_configure(struct rte_eth_dev *dev)
 
 	internal->vlan_strip = !!(rxmode->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
 
+	vhost_dev_csum_configure(dev);
+
 	return 0;
 }
 
@@ -1281,6 +1360,10 @@ eth_dev_info(struct rte_eth_dev *dev,
 	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
 				RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
 	dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
+	if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) {
+		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
+			RTE_ETH_RX_OFFLOAD_TCP_CKSUM;
+	}
 
 	return 0;
 }
-- 
2.35.1


  parent reply	other threads:[~2022-05-05 10:27 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-05 10:27 [PATCH 0/6] Vhost checksum offload improvements Maxime Coquelin
2022-05-05 10:27 ` [PATCH 1/6] Revert "app/testpmd: modify mac in csum forwarding" Maxime Coquelin
2022-05-16 13:03   ` Xia, Chenbo
2022-05-17 15:24     ` Zhang, Yuying
2022-05-19 16:27   ` David Marchand
2022-05-05 10:27 ` [PATCH 2/6] vhost: fix missing enqueue pseudo-header calculation Maxime Coquelin
2022-05-16 13:24   ` Xia, Chenbo
2022-05-05 10:27 ` [PATCH 3/6] net/vhost: enable compliant offloading mode Maxime Coquelin
2022-05-16 13:26   ` Xia, Chenbo
2022-05-16 13:28     ` Maxime Coquelin
2022-05-16 13:39       ` Xia, Chenbo
2022-06-07  1:19         ` Ma, WenwuX
2022-06-08  8:19           ` Maxime Coquelin
2022-05-05 10:27 ` [PATCH 4/6] net/vhost: make VLAN stripping flag a boolean Maxime Coquelin
2022-05-16 13:27   ` Xia, Chenbo
2022-05-05 10:27 ` Maxime Coquelin [this message]
2022-05-05 10:27 ` [PATCH 6/6] net/vhost: perform SW checksum in Tx path Maxime Coquelin
2022-05-07  3:20   ` Ma, WenwuX
2022-06-02  9:07     ` Maxime Coquelin
2022-06-06  9:44       ` Ma, WenwuX
2022-06-08  8:14         ` Maxime Coquelin
2022-06-09  1:03           ` Ma, WenwuX

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220505102729.821075-6-maxime.coquelin@redhat.com \
    --to=maxime.coquelin@redhat.com \
    --cc=chenbo.xia@intel.com \
    --cc=david.marchand@redhat.com \
    --cc=dev@dpdk.org \
    --cc=jasowang@redhat.com \
    --cc=olivier.matz@6wind.com \
    --cc=stable@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

patches for DPDK stable branches

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://inbox.dpdk.org/stable/0 stable/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 stable stable/ http://inbox.dpdk.org/stable \
		stable@dpdk.org
	public-inbox-index stable

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.stable


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git