DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues
@ 2018-03-28 15:43 Didier Pallard
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 1/8] net: export IPv6 header extensions skip function Didier Pallard
                   ` (9 more replies)
  0 siblings, 10 replies; 17+ messages in thread
From: Didier Pallard @ 2018-03-28 15:43 UTC (permalink / raw)
  To: dev

This patchset fixes several issues found in vmxnet3 driver
when enabling LRO offload support:
- Rx offload information are not correctly gathered in
  multisegmented packets, leading to inconsistent
  packet type and Rx offload bits in resulting mbuf
- MSS recovery from offload information is not done
  thus LRO mbufs do not contain a correct tso_segsz value.
- MSS value is not propagated by the host on some
  hypervisor versions (6.0 for example)
- If two small TCP segments are aggregated in a single
  mbuf, an empty segment that only contains offload
  information is appended to this segment, and is
  propagated as is to the application. But if the application
  sends back to the hypervisor a mbuf with an empty
  segment, this mbuf is dropped by the hypervisor.


Didier Pallard (8):
  net: export IPv6 header extensions skip function
  net/vmxnet3: return unknown IPv4 extension len ptype
  net/vmxnet3: gather offload data on first and last segment
  net/vmxnet3: fix Rx offload information in multiseg packets
  net/vmxnet3: complete Rx offloads support
  net/vmxnet3: guess mss if not provided in LRO mode
  net/vmxnet3: ignore emtpy segments in reception
  net/vmxnet3: skip empty segments in transmission

 drivers/net/vmxnet3/Makefile            |   1 +
 drivers/net/vmxnet3/base/vmxnet3_defs.h |  27 ++++-
 drivers/net/vmxnet3/vmxnet3_ethdev.c    |   2 +
 drivers/net/vmxnet3/vmxnet3_ethdev.h    |   1 +
 drivers/net/vmxnet3/vmxnet3_rxtx.c      | 200 ++++++++++++++++++++++++++------
 lib/librte_net/Makefile                 |   1 +
 lib/librte_net/rte_net.c                |  21 ++--
 lib/librte_net/rte_net.h                |  27 +++++
 lib/librte_net/rte_net_version.map      |   1 +
 9 files changed, 238 insertions(+), 43 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH 1/8] net: export IPv6 header extensions skip function
  2018-03-28 15:43 [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues Didier Pallard
@ 2018-03-28 15:43 ` Didier Pallard
  2018-04-17 19:28   ` Ferruh Yigit
  2018-04-23  8:35   ` Olivier Matz
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 2/8] net/vmxnet3: return unknown IPv4 extension len ptype Didier Pallard
                   ` (8 subsequent siblings)
  9 siblings, 2 replies; 17+ messages in thread
From: Didier Pallard @ 2018-03-28 15:43 UTC (permalink / raw)
  To: dev

skip_ip6_ext function can be exported as a helper, it may be used
by some PMD to skip IPv6 header extensions.

Signed-off-by: Didier Pallard <didier.pallard@6wind.com>
---
 lib/librte_net/Makefile            |  1 +
 lib/librte_net/rte_net.c           | 21 ++++++++++++++-------
 lib/librte_net/rte_net.h           | 27 +++++++++++++++++++++++++++
 lib/librte_net/rte_net_version.map |  1 +
 4 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index 95ff54900..85e403f41 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -5,6 +5,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 
 LIB = librte_net.a
 
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
 LDLIBS += -lrte_mbuf -lrte_eal -lrte_mempool
 
diff --git a/lib/librte_net/rte_net.c b/lib/librte_net/rte_net.c
index 56a13e3c4..9eb7c7438 100644
--- a/lib/librte_net/rte_net.c
+++ b/lib/librte_net/rte_net.c
@@ -178,8 +178,8 @@ ip4_hlen(const struct ipv4_hdr *hdr)
 }
 
 /* parse ipv6 extended headers, update offset and return next proto */
-static uint16_t
-skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
+int __rte_experimental
+rte_net_skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
 	int *frag)
 {
 	struct ext_hdr {
@@ -201,7 +201,7 @@ skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
 			xh = rte_pktmbuf_read(m, *off, sizeof(*xh),
 				&xh_copy);
 			if (xh == NULL)
-				return 0;
+				return -1;
 			*off += (xh->len + 1) * 8;
 			proto = xh->next_hdr;
 			break;
@@ -209,7 +209,7 @@ skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
 			xh = rte_pktmbuf_read(m, *off, sizeof(*xh),
 				&xh_copy);
 			if (xh == NULL)
-				return 0;
+				return -1;
 			*off += 8;
 			proto = xh->next_hdr;
 			*frag = 1;
@@ -220,7 +220,7 @@ skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
 			return proto;
 		}
 	}
-	return 0;
+	return -1;
 }
 
 /* parse mbuf data to get packet type */
@@ -233,6 +233,7 @@ uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
 	uint32_t pkt_type = RTE_PTYPE_L2_ETHER;
 	uint32_t off = 0;
 	uint16_t proto;
+	int ret;
 
 	if (hdr_lens == NULL)
 		hdr_lens = &local_hdr_lens;
@@ -316,7 +317,10 @@ uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
 		off += hdr_lens->l3_len;
 		pkt_type |= ptype_l3_ip6(proto);
 		if ((pkt_type & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6_EXT) {
-			proto = skip_ip6_ext(proto, m, &off, &frag);
+			ret = rte_net_skip_ip6_ext(proto, m, &off, &frag);
+			if (ret < 0)
+				return pkt_type;
+			proto = ret;
 			hdr_lens->l3_len = off - hdr_lens->l2_len;
 		}
 		if (proto == 0)
@@ -449,7 +453,10 @@ uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
 			uint32_t prev_off;
 
 			prev_off = off;
-			proto = skip_ip6_ext(proto, m, &off, &frag);
+			ret = rte_net_skip_ip6_ext(proto, m, &off, &frag);
+			if (ret < 0)
+				return pkt_type;
+			proto = ret;
 			hdr_lens->inner_l3_len += off - prev_off;
 		}
 		if (proto == 0)
diff --git a/lib/librte_net/rte_net.h b/lib/librte_net/rte_net.h
index 0e97901f3..b6ab6e1d5 100644
--- a/lib/librte_net/rte_net.h
+++ b/lib/librte_net/rte_net.h
@@ -29,6 +29,33 @@ struct rte_net_hdr_lens {
 };
 
 /**
+ * Skip IPv6 header extensions.
+ *
+ * This function skips all IPv6 extensions, returning size of
+ * complete header including options and final protocol value.
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * @param proto
+ *   Protocol field of IPv6 header.
+ * @param m
+ *   The packet mbuf to be parsed.
+ * @param off
+ *   On input, must contain the offset to the first byte following
+ *   IPv6 header, on output, contains offset to the first byte
+ *   of next layer (after any IPv6 extension header)
+ * @param frag
+ *   Contains 1 in output if packet is an IPv6 fragment.
+ * @return
+ *   Protocol that follows IPv6 header.
+ *   -1 if an error occurs during mbuf parsing.
+ */
+int __rte_experimental
+rte_net_skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
+	int *frag);
+
+/**
  * Parse an Ethernet packet to get its packet type.
  *
  * This function parses the network headers in mbuf data and return its
diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map
index 213e6fd32..8bc57d51c 100644
--- a/lib/librte_net/rte_net_version.map
+++ b/lib/librte_net/rte_net_version.map
@@ -17,4 +17,5 @@ EXPERIMENTAL {
 	global:
 
 	rte_net_make_rarp_packet;
+	rte_net_skip_ip6_ext;
 } DPDK_17.05;
-- 
2.11.0

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH 2/8] net/vmxnet3: return unknown IPv4 extension len ptype
  2018-03-28 15:43 [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues Didier Pallard
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 1/8] net: export IPv6 header extensions skip function Didier Pallard
@ 2018-03-28 15:43 ` Didier Pallard
  2018-04-16 19:46   ` Yong Wang
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 3/8] net/vmxnet3: gather offload data on first and last segment Didier Pallard
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 17+ messages in thread
From: Didier Pallard @ 2018-03-28 15:43 UTC (permalink / raw)
  To: dev

Rather than parsing IP header to get proper ptype to return, just return
RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, that tells application that we have an IP
packet with unknown header length.

Signed-off-by: Didier Pallard <didier.pallard@6wind.com>
---
 drivers/net/vmxnet3/vmxnet3_rxtx.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 3a8c62fc1..156dc8e52 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -659,13 +659,7 @@ vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
 
 	/* Check packet type, checksum errors, etc. Only support IPv4 for now. */
 	if (rcd->v4) {
-		struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
-		struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
-
-		if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
-			rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
-		else
-			rxm->packet_type = RTE_PTYPE_L3_IPV4;
+		rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
 
 		if (!rcd->cnc) {
 			if (!rcd->ipc)
-- 
2.11.0

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH 3/8] net/vmxnet3: gather offload data on first and last segment
  2018-03-28 15:43 [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues Didier Pallard
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 1/8] net: export IPv6 header extensions skip function Didier Pallard
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 2/8] net/vmxnet3: return unknown IPv4 extension len ptype Didier Pallard
@ 2018-03-28 15:43 ` Didier Pallard
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 4/8] net/vmxnet3: fix Rx offload information in multiseg packets Didier Pallard
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 17+ messages in thread
From: Didier Pallard @ 2018-03-28 15:43 UTC (permalink / raw)
  To: dev

Offloads are split between first and last segment of a packet.
Call a single vmxnet3_rx_offload function that will contain all
offload operations. This patch does not introduce any code modification.

Pass a vmxnet3_hw as parameter to the function, it is not presently
used in this patch, but will be later used for TSO offloads.

Signed-off-by: Didier Pallard <didier.pallard@6wind.com>
---
 drivers/net/vmxnet3/vmxnet3_rxtx.c | 56 +++++++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 156dc8e52..27f17ef0a 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -648,28 +648,40 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
 
 
 /* Receive side checksum and other offloads */
-static void
-vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
+static inline void
+vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
+		struct rte_mbuf *rxm, const uint8_t sop)
 {
-	/* Check for RSS */
-	if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
-		rxm->ol_flags |= PKT_RX_RSS_HASH;
-		rxm->hash.rss = rcd->rssHash;
-	}
+	(void)hw;
+
+	/* Offloads set in sop */
+	if (sop) {
+		/* Check for RSS */
+		if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
+			rxm->ol_flags |= PKT_RX_RSS_HASH;
+			rxm->hash.rss = rcd->rssHash;
+		}
 
-	/* Check packet type, checksum errors, etc. Only support IPv4 for now. */
-	if (rcd->v4) {
-		rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
+		/* Check packet type, checksum errors. Only IPv4 for now. */
+		if (rcd->v4) {
+			rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
 
-		if (!rcd->cnc) {
-			if (!rcd->ipc)
-				rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
+			if (!rcd->cnc) {
+				if (!rcd->ipc)
+					rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
 
-			if ((rcd->tcp || rcd->udp) && !rcd->tuc)
-				rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
+				if ((rcd->tcp || rcd->udp) && !rcd->tuc)
+					rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
+			}
+		} else {
+			rxm->packet_type = RTE_PTYPE_UNKNOWN;
+		}
+	} else { /* Offloads set in eop */
+		/* Check for hardware stripped VLAN tag */
+		if (rcd->ts) {
+			rxm->ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
+			rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
 		}
-	} else {
-		rxm->packet_type = RTE_PTYPE_UNKNOWN;
 	}
 }
 
@@ -801,7 +813,7 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 			}
 
 			rxq->start_seg = rxm;
-			vmxnet3_rx_offload(rcd, rxm);
+			vmxnet3_rx_offload(hw, rcd, rxm, 1);
 		} else {
 			struct rte_mbuf *start = rxq->start_seg;
 
@@ -817,13 +829,7 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if (rcd->eop) {
 			struct rte_mbuf *start = rxq->start_seg;
 
-			/* Check for hardware stripped VLAN tag */
-			if (rcd->ts) {
-				start->ol_flags |= (PKT_RX_VLAN |
-						PKT_RX_VLAN_STRIPPED);
-				start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
-			}
-
+			vmxnet3_rx_offload(hw, rcd, start, 0);
 			rx_pkts[nb_rx++] = start;
 			rxq->start_seg = NULL;
 		}
-- 
2.11.0

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH 4/8] net/vmxnet3: fix Rx offload information in multiseg packets
  2018-03-28 15:43 [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues Didier Pallard
                   ` (2 preceding siblings ...)
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 3/8] net/vmxnet3: gather offload data on first and last segment Didier Pallard
@ 2018-03-28 15:43 ` Didier Pallard
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 5/8] net/vmxnet3: complete Rx offloads support Didier Pallard
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 17+ messages in thread
From: Didier Pallard @ 2018-03-28 15:43 UTC (permalink / raw)
  To: dev

In case we are working on a multisegment buffer, most bit are set
in last segment of the buffer. Correctly look at those bits in eop part
of the rx_offload function.

Fixes: 2fdd835f992c ("vmxnet3: support jumbo frames")
Signed-off-by: Didier Pallard <didier.pallard@6wind.com>
---
 drivers/net/vmxnet3/vmxnet3_rxtx.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 27f17ef0a..1acd6c19e 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -656,12 +656,19 @@ vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
 
 	/* Offloads set in sop */
 	if (sop) {
+	} else { /* Offloads set in eop */
 		/* Check for RSS */
 		if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
 			rxm->ol_flags |= PKT_RX_RSS_HASH;
 			rxm->hash.rss = rcd->rssHash;
 		}
 
+		/* Check for hardware stripped VLAN tag */
+		if (rcd->ts) {
+			rxm->ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
+			rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
+		}
+
 		/* Check packet type, checksum errors. Only IPv4 for now. */
 		if (rcd->v4) {
 			rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
@@ -676,12 +683,6 @@ vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
 		} else {
 			rxm->packet_type = RTE_PTYPE_UNKNOWN;
 		}
-	} else { /* Offloads set in eop */
-		/* Check for hardware stripped VLAN tag */
-		if (rcd->ts) {
-			rxm->ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
-			rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
-		}
 	}
 }
 
-- 
2.11.0

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH 5/8] net/vmxnet3: complete Rx offloads support
  2018-03-28 15:43 [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues Didier Pallard
                   ` (3 preceding siblings ...)
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 4/8] net/vmxnet3: fix Rx offload information in multiseg packets Didier Pallard
@ 2018-03-28 15:43 ` Didier Pallard
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 6/8] net/vmxnet3: guess mss if not provided in LRO mode Didier Pallard
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 17+ messages in thread
From: Didier Pallard @ 2018-03-28 15:43 UTC (permalink / raw)
  To: dev

Add support for IPv6, LRO and properly set packet type in all
supported cases.

Signed-off-by: Didier Pallard <didier.pallard@6wind.com>
---
 drivers/net/vmxnet3/base/vmxnet3_defs.h | 27 ++++++++++-
 drivers/net/vmxnet3/vmxnet3_rxtx.c      | 82 +++++++++++++++++++++++++++------
 2 files changed, 93 insertions(+), 16 deletions(-)

diff --git a/drivers/net/vmxnet3/base/vmxnet3_defs.h b/drivers/net/vmxnet3/base/vmxnet3_defs.h
index a455e2706..7a6f87dc2 100644
--- a/drivers/net/vmxnet3/base/vmxnet3_defs.h
+++ b/drivers/net/vmxnet3/base/vmxnet3_defs.h
@@ -327,7 +327,32 @@ struct Vmxnet3_RxCompDescExt {
    uint8  segCnt;       /* Number of aggregated packets */
    uint8  dupAckCnt;    /* Number of duplicate Acks */
    __le16 tsDelta;      /* TCP timestamp difference */
-   __le32 dword2[2];
+	__le32 dword2;
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint32 gen : 1;     /* generation bit */
+	uint32 type : 7;    /* completion type */
+	uint32 fcs : 1;     /* Frame CRC correct */
+	uint32 frg : 1;     /* IP Fragment */
+	uint32 v4 : 1;      /* IPv4 */
+	uint32 v6 : 1;      /* IPv6 */
+	uint32 ipc : 1;     /* IP Checksum Correct */
+	uint32 tcp : 1;     /* TCP packet */
+	uint32 udp : 1;     /* UDP packet */
+	uint32 tuc : 1;     /* TCP/UDP Checksum Correct */
+	uint32 mss : 16;
+#else
+	uint32 mss : 16;
+	uint32 tuc : 1;     /* TCP/UDP Checksum Correct */
+	uint32 udp : 1;     /* UDP packet */
+	uint32 tcp : 1;     /* TCP packet */
+	uint32 ipc : 1;     /* IP Checksum Correct */
+	uint32 v6 : 1;      /* IPv6 */
+	uint32 v4 : 1;      /* IPv4 */
+	uint32 frg : 1;     /* IP Fragment */
+	uint32 fcs : 1;     /* Frame CRC correct */
+	uint32 type : 7;    /* completion type */
+	uint32 gen : 1;     /* generation bit */
+#endif  /* __BIG_ENDIAN_BITFIELD */
 }
 #include "vmware_pack_end.h"
 Vmxnet3_RxCompDescExt;
diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 1acd6c19e..8ed7bd403 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -652,38 +652,89 @@ static inline void
 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
 		struct rte_mbuf *rxm, const uint8_t sop)
 {
-	(void)hw;
+	uint64_t ol_flags = rxm->ol_flags;
+	uint32_t packet_type = rxm->packet_type;
 
 	/* Offloads set in sop */
 	if (sop) {
+		/* Set packet type */
+		packet_type |= RTE_PTYPE_L2_ETHER;
+
+		/* Check large packet receive */
+		if (VMXNET3_VERSION_GE_2(hw) &&
+		    rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
+			const Vmxnet3_RxCompDescExt *rcde =
+					(const Vmxnet3_RxCompDescExt *)rcd;
+
+			rxm->tso_segsz = rcde->mss;
+			ol_flags |= PKT_RX_LRO;
+		}
 	} else { /* Offloads set in eop */
 		/* Check for RSS */
 		if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
-			rxm->ol_flags |= PKT_RX_RSS_HASH;
+			ol_flags |= PKT_RX_RSS_HASH;
 			rxm->hash.rss = rcd->rssHash;
 		}
 
 		/* Check for hardware stripped VLAN tag */
 		if (rcd->ts) {
-			rxm->ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
+			ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
 			rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
 		}
 
-		/* Check packet type, checksum errors. Only IPv4 for now. */
-		if (rcd->v4) {
-			rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
-
-			if (!rcd->cnc) {
-				if (!rcd->ipc)
-					rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
-
-				if ((rcd->tcp || rcd->udp) && !rcd->tuc)
-					rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
-			}
+		/* Check packet type, checksum errors, etc. */
+		if (rcd->cnc) {
+			ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
 		} else {
-			rxm->packet_type = RTE_PTYPE_UNKNOWN;
+			if (rcd->v4) {
+				packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
+
+				if (rcd->ipc)
+					ol_flags |= PKT_RX_IP_CKSUM_GOOD;
+				else
+					ol_flags |= PKT_RX_IP_CKSUM_BAD;
+
+				if (rcd->tuc) {
+					ol_flags |= PKT_RX_L4_CKSUM_GOOD;
+					if (rcd->tcp)
+						packet_type |= RTE_PTYPE_L4_TCP;
+					else
+						packet_type |= RTE_PTYPE_L4_UDP;
+				} else {
+					if (rcd->tcp) {
+						packet_type |= RTE_PTYPE_L4_TCP;
+						ol_flags |= PKT_RX_L4_CKSUM_BAD;
+					} else if (rcd->udp) {
+						packet_type |= RTE_PTYPE_L4_UDP;
+						ol_flags |= PKT_RX_L4_CKSUM_BAD;
+					}
+				}
+			} else if (rcd->v6) {
+				packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
+
+				if (rcd->tuc) {
+					ol_flags |= PKT_RX_L4_CKSUM_GOOD;
+					if (rcd->tcp)
+						packet_type |= RTE_PTYPE_L4_TCP;
+					else
+						packet_type |= RTE_PTYPE_L4_UDP;
+				} else {
+					if (rcd->tcp) {
+						packet_type |= RTE_PTYPE_L4_TCP;
+						ol_flags |= PKT_RX_L4_CKSUM_BAD;
+					} else if (rcd->udp) {
+						packet_type |= RTE_PTYPE_L4_UDP;
+						ol_flags |= PKT_RX_L4_CKSUM_BAD;
+					}
+				}
+			} else {
+				packet_type |= RTE_PTYPE_UNKNOWN;
+			}
 		}
 	}
+
+	rxm->ol_flags = ol_flags;
+	rxm->packet_type = packet_type;
 }
 
 /*
@@ -783,6 +834,7 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
 		rxm->ol_flags = 0;
 		rxm->vlan_tci = 0;
+		rxm->packet_type = 0;
 
 		/*
 		 * If this is the first buffer of the received packet,
-- 
2.11.0

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH 6/8] net/vmxnet3: guess mss if not provided in LRO mode
  2018-03-28 15:43 [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues Didier Pallard
                   ` (4 preceding siblings ...)
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 5/8] net/vmxnet3: complete Rx offloads support Didier Pallard
@ 2018-03-28 15:43 ` Didier Pallard
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 7/8] net/vmxnet3: ignore emtpy segments in reception Didier Pallard
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 17+ messages in thread
From: Didier Pallard @ 2018-03-28 15:43 UTC (permalink / raw)
  To: dev

Not so old variants of vmxnet3 do not provide MSS value along with
LRO packet. When this case happens, try to guess MSS value with
information at hand.

Signed-off-by: Didier Pallard <didier.pallard@6wind.com>
---
 drivers/net/vmxnet3/Makefile         |  1 +
 drivers/net/vmxnet3/vmxnet3_ethdev.c |  2 ++
 drivers/net/vmxnet3/vmxnet3_ethdev.h |  1 +
 drivers/net/vmxnet3/vmxnet3_rxtx.c   | 59 ++++++++++++++++++++++++++++++++++++
 4 files changed, 63 insertions(+)

diff --git a/drivers/net/vmxnet3/Makefile b/drivers/net/vmxnet3/Makefile
index 6bfbf0195..8cd007d3b 100644
--- a/drivers/net/vmxnet3/Makefile
+++ b/drivers/net/vmxnet3/Makefile
@@ -8,6 +8,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 #
 LIB = librte_pmd_vmxnet3_uio.a
 
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 
diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c
index 4e68aae6b..32a68262e 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
@@ -615,6 +615,8 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev)
 	uint32_t i;
 	int ret;
 
+	hw->mtu = mtu;
+
 	shared->magic = VMXNET3_REV1_MAGIC;
 	devRead->misc.driverInfo.version = VMXNET3_DRIVER_VERSION_NUM;
 
diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.h b/drivers/net/vmxnet3/vmxnet3_ethdev.h
index b2a8cf35b..d3f2b3529 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethdev.h
+++ b/drivers/net/vmxnet3/vmxnet3_ethdev.h
@@ -87,6 +87,7 @@ struct vmxnet3_hw {
 
 	uint64_t              queueDescPA;
 	uint16_t              queue_desc_len;
+	uint16_t              mtu;
 
 	VMXNET3_RSSConf       *rss_conf;
 	uint64_t              rss_confPA;
diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 8ed7bd403..1f273f88e 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -646,6 +646,59 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
 		return i;
 }
 
+/* MSS not provided by vmxnet3, guess one with available information */
+static uint16_t
+vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
+		struct rte_mbuf *rxm)
+{
+	uint32_t hlen, slen;
+	struct ipv4_hdr *ipv4_hdr;
+	struct ipv6_hdr *ipv6_hdr;
+	struct tcp_hdr *tcp_hdr;
+	char *ptr;
+
+	RTE_ASSERT(rcd->tcp);
+
+	ptr = rte_pktmbuf_mtod(rxm, char *);
+	slen = rte_pktmbuf_data_len(rxm);
+	hlen = sizeof(struct ether_hdr);
+
+	if (rcd->v4) {
+		if (unlikely(slen < hlen + sizeof(struct ipv4_hdr)))
+			return hw->mtu - sizeof(struct ipv4_hdr)
+					- sizeof(struct tcp_hdr);
+
+		ipv4_hdr = (struct ipv4_hdr *)(ptr + hlen);
+		hlen += (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
+				IPV4_IHL_MULTIPLIER;
+	} else if (rcd->v6) {
+		if (unlikely(slen < hlen + sizeof(struct ipv6_hdr)))
+			return hw->mtu - sizeof(struct ipv6_hdr) -
+					sizeof(struct tcp_hdr);
+
+		ipv6_hdr = (struct ipv6_hdr *)(ptr + hlen);
+		hlen += sizeof(struct ipv6_hdr);
+		if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
+			int frag;
+
+			rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
+					&hlen, &frag);
+		}
+	}
+
+	if (unlikely(slen < hlen + sizeof(struct tcp_hdr)))
+		return hw->mtu - hlen - sizeof(struct tcp_hdr) +
+				sizeof(struct ether_hdr);
+
+	tcp_hdr = (struct tcp_hdr *)(ptr + hlen);
+	hlen += (tcp_hdr->data_off & 0xf0) >> 2;
+
+	if (rxm->udata64 > 1)
+		return (rte_pktmbuf_pkt_len(rxm) - hlen +
+				rxm->udata64 - 1) / rxm->udata64;
+	else
+		return hw->mtu - hlen + sizeof(struct ether_hdr);
+}
 
 /* Receive side checksum and other offloads */
 static inline void
@@ -667,6 +720,7 @@ vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
 					(const Vmxnet3_RxCompDescExt *)rcd;
 
 			rxm->tso_segsz = rcde->mss;
+			rxm->udata64 = rcde->segCnt;
 			ol_flags |= PKT_RX_LRO;
 		}
 	} else { /* Offloads set in eop */
@@ -730,6 +784,11 @@ vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
 			} else {
 				packet_type |= RTE_PTYPE_UNKNOWN;
 			}
+
+			/* Old variants of vmxnet3 do not provide MSS */
+			if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
+				rxm->tso_segsz = vmxnet3_guess_mss(hw,
+						rcd, rxm);
 		}
 	}
 
-- 
2.11.0

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH 7/8] net/vmxnet3: ignore emtpy segments in reception
  2018-03-28 15:43 [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues Didier Pallard
                   ` (5 preceding siblings ...)
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 6/8] net/vmxnet3: guess mss if not provided in LRO mode Didier Pallard
@ 2018-03-28 15:43 ` Didier Pallard
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 8/8] net/vmxnet3: skip empty segments in transmission Didier Pallard
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 17+ messages in thread
From: Didier Pallard @ 2018-03-28 15:43 UTC (permalink / raw)
  To: dev

When several TCP fragments are contained in a packet that is only one mbuf
segment long, vmxnet3 receives an empty segment following first one, that
contains offload information. In current version, this segment is
propagated as is to upper application.
Remove those empty segments directly when receiving buffers, they may
generate unneeded extra processing in the upper application.

Signed-off-by: Didier Pallard <didier.pallard@6wind.com>
---
 drivers/net/vmxnet3/vmxnet3_rxtx.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 1f273f88e..1d344b26e 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -925,18 +925,23 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 			}
 
 			rxq->start_seg = rxm;
+			rxq->last_seg = rxm;
 			vmxnet3_rx_offload(hw, rcd, rxm, 1);
 		} else {
 			struct rte_mbuf *start = rxq->start_seg;
 
 			RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
 
-			start->pkt_len += rxm->data_len;
-			start->nb_segs++;
+			if (rxm->data_len) {
+				start->pkt_len += rxm->data_len;
+				start->nb_segs++;
 
-			rxq->last_seg->next = rxm;
+				rxq->last_seg->next = rxm;
+				rxq->last_seg = rxm;
+			} else {
+				rte_pktmbuf_free_seg(rxm);
+			}
 		}
-		rxq->last_seg = rxm;
 
 		if (rcd->eop) {
 			struct rte_mbuf *start = rxq->start_seg;
-- 
2.11.0

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH 8/8] net/vmxnet3: skip empty segments in transmission
  2018-03-28 15:43 [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues Didier Pallard
                   ` (6 preceding siblings ...)
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 7/8] net/vmxnet3: ignore emtpy segments in reception Didier Pallard
@ 2018-03-28 15:43 ` Didier Pallard
  2018-04-13  4:44 ` [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues Yong Wang
  2018-04-20 22:02 ` Yong Wang
  9 siblings, 0 replies; 17+ messages in thread
From: Didier Pallard @ 2018-03-28 15:43 UTC (permalink / raw)
  To: dev

Packets containing empty segments are dropped by hypervisor, prevent
this case by skipping empty segments in transmission.
Also drop empty mbufs to be sure that at least one segment is transmitted
for each mbuf.

Signed-off-by: Didier Pallard <didier.pallard@6wind.com>
---
 drivers/net/vmxnet3/vmxnet3_rxtx.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 1d344b26e..32991f4e6 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -457,6 +457,14 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		    rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
 			struct Vmxnet3_TxDataDesc *tdd;
 
+			/* Skip empty packets */
+			if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
+				txq->stats.drop_total++;
+				rte_pktmbuf_free(txm);
+				nb_tx++;
+				continue;
+			}
+
 			tdd = (struct Vmxnet3_TxDataDesc *)
 				((uint8 *)txq->data_ring.base +
 				 txq->cmd_ring.next2fill *
@@ -477,6 +485,11 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			 * maximum size of mbuf segment size.
 			 */
 			gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
+
+			/* Skip empty segments */
+			if (unlikely(m_seg->data_len == 0))
+				continue;
+
 			if (copy_size) {
 				uint64 offset =
 					(uint64)txq->cmd_ring.next2fill *
-- 
2.11.0

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues
  2018-03-28 15:43 [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues Didier Pallard
                   ` (7 preceding siblings ...)
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 8/8] net/vmxnet3: skip empty segments in transmission Didier Pallard
@ 2018-04-13  4:44 ` Yong Wang
  2018-04-13 14:33   ` Didier Pallard
  2018-04-20 22:02 ` Yong Wang
  9 siblings, 1 reply; 17+ messages in thread
From: Yong Wang @ 2018-04-13  4:44 UTC (permalink / raw)
  To: Didier Pallard, dev

On 3/28/18, 8:44 AM, "dev on behalf of Didier Pallard" <dev-bounces@dpdk.org on behalf of didier.pallard@6wind.com> wrote:

    This patchset fixes several issues found in vmxnet3 driver
    when enabling LRO offload support:
    - Rx offload information are not correctly gathered in
      multisegmented packets, leading to inconsistent
      packet type and Rx offload bits in resulting mbuf
    - MSS recovery from offload information is not done
      thus LRO mbufs do not contain a correct tso_segsz value.
    - MSS value is not propagated by the host on some
      hypervisor versions (6.0 for example)
    - If two small TCP segments are aggregated in a single
      mbuf, an empty segment that only contains offload
      information is appended to this segment, and is
      propagated as is to the application. But if the application
      sends back to the hypervisor a mbuf with an empty
      segment, this mbuf is dropped by the hypervisor.
    
    Didier Pallard (8):
      net: export IPv6 header extensions skip function
      net/vmxnet3: return unknown IPv4 extension len ptype
      net/vmxnet3: gather offload data on first and last segment
      net/vmxnet3: fix Rx offload information in multiseg packets
      net/vmxnet3: complete Rx offloads support
      net/vmxnet3: guess mss if not provided in LRO mode
      net/vmxnet3: ignore emtpy segments in reception
      net/vmxnet3: skip empty segments in transmission
    
     drivers/net/vmxnet3/Makefile            |   1 +
     drivers/net/vmxnet3/base/vmxnet3_defs.h |  27 ++++-
     drivers/net/vmxnet3/vmxnet3_ethdev.c    |   2 +
     drivers/net/vmxnet3/vmxnet3_ethdev.h    |   1 +
     drivers/net/vmxnet3/vmxnet3_rxtx.c      | 200 ++++++++++++++++++++++++++------
     lib/librte_net/Makefile                 |   1 +
     lib/librte_net/rte_net.c                |  21 ++--
     lib/librte_net/rte_net.h                |  27 +++++
     lib/librte_net/rte_net_version.map      |   1 +
     9 files changed, 238 insertions(+), 43 deletions(-)
    
    -- 
    2.11.0
    
Didier, the changes look good overall.  Can you describe how did you test this patch set as well as making sure no regression for non-lro case? 


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues
  2018-04-13  4:44 ` [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues Yong Wang
@ 2018-04-13 14:33   ` Didier Pallard
  0 siblings, 0 replies; 17+ messages in thread
From: Didier Pallard @ 2018-04-13 14:33 UTC (permalink / raw)
  To: Yong Wang, dev

Hi wang,

We didn't make unitary tests with testpmd tools, the validation tests 
have been done
using our DPDK application in the following topology:


+------------------------------+
| +-----------+  +-----------+ |
| | Linux VM1 |  | Linux VM2 | |
| +------+----+  +----+------+ |
|       VMware DvSwitch        |
|     +--+------------+--+     |
|     |  +---OVSbr0---+  |     |
|     |                  |     |
|     |  6WIND DPDK app  |     |
|     +------------------+     |
|      VMware ESXi 6.0/6.5     |
+------------------------------+



All the available offloads are enabled in Linux VM 1 and 2.
Iperf TCP traffic is started from Linux VM1 to Linux VM2.


With ESXi 6.0 (vHW 11), we got the following numbers using 2 cores for 
our DPDK app:
- with LRO enabled on the DPDK app ports: 21 Gbps
- with LRO disabled on the DPDK app ports: 9 Gbps


With ESXi 6.5 (vHW 13), we got the following numbers using 2 cores for 
our DPDK app:
- with LRO enabled on the DPDK app ports: 40 Gbps
- with LRO disabled on the DPDK app ports: 20 Gbps


Didier

/*
*/
On 04/13/2018 06:44 AM, Yong Wang wrote:
> On 3/28/18, 8:44 AM, "dev on behalf of Didier Pallard" <dev-bounces@dpdk.org on behalf of didier.pallard@6wind.com> wrote:
>
>      This patchset fixes several issues found in vmxnet3 driver
>      when enabling LRO offload support:
>      - Rx offload information are not correctly gathered in
>        multisegmented packets, leading to inconsistent
>        packet type and Rx offload bits in resulting mbuf
>      - MSS recovery from offload information is not done
>        thus LRO mbufs do not contain a correct tso_segsz value.
>      - MSS value is not propagated by the host on some
>        hypervisor versions (6.0 for example)
>      - If two small TCP segments are aggregated in a single
>        mbuf, an empty segment that only contains offload
>        information is appended to this segment, and is
>        propagated as is to the application. But if the application
>        sends back to the hypervisor a mbuf with an empty
>        segment, this mbuf is dropped by the hypervisor.
>      
>      Didier Pallard (8):
>        net: export IPv6 header extensions skip function
>        net/vmxnet3: return unknown IPv4 extension len ptype
>        net/vmxnet3: gather offload data on first and last segment
>        net/vmxnet3: fix Rx offload information in multiseg packets
>        net/vmxnet3: complete Rx offloads support
>        net/vmxnet3: guess mss if not provided in LRO mode
>        net/vmxnet3: ignore emtpy segments in reception
>        net/vmxnet3: skip empty segments in transmission
>      
>       drivers/net/vmxnet3/Makefile            |   1 +
>       drivers/net/vmxnet3/base/vmxnet3_defs.h |  27 ++++-
>       drivers/net/vmxnet3/vmxnet3_ethdev.c    |   2 +
>       drivers/net/vmxnet3/vmxnet3_ethdev.h    |   1 +
>       drivers/net/vmxnet3/vmxnet3_rxtx.c      | 200 ++++++++++++++++++++++++++------
>       lib/librte_net/Makefile                 |   1 +
>       lib/librte_net/rte_net.c                |  21 ++--
>       lib/librte_net/rte_net.h                |  27 +++++
>       lib/librte_net/rte_net_version.map      |   1 +
>       9 files changed, 238 insertions(+), 43 deletions(-)
>      
>      --
>      2.11.0
>      
> Didier, the changes look good overall.  Can you describe how did you test this patch set as well as making sure no regression for non-lro case?
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH 2/8] net/vmxnet3: return unknown IPv4 extension len ptype
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 2/8] net/vmxnet3: return unknown IPv4 extension len ptype Didier Pallard
@ 2018-04-16 19:46   ` Yong Wang
  2018-04-17  9:09     ` Didier Pallard
  0 siblings, 1 reply; 17+ messages in thread
From: Yong Wang @ 2018-04-16 19:46 UTC (permalink / raw)
  To: Didier Pallard, dev

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Didier Pallard
> Sent: Wednesday, March 28, 2018 8:44 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH 2/8] net/vmxnet3: return unknown IPv4
> extension len ptype
> 
> Rather than parsing IP header to get proper ptype to return, just return
> RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, that tells application that we have an
> IP
> packet with unknown header length.

Any specific reason of doing this? I can image there are applications that depend on this and the cost of parsing is simply shifted to the app or via rte_eth_add_rx_callback().

> 
> Signed-off-by: Didier Pallard <didier.pallard@6wind.com>
> ---
>  drivers/net/vmxnet3/vmxnet3_rxtx.c | 8 +-------
>  1 file changed, 1 insertion(+), 7 deletions(-)
> 
> diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c
> b/drivers/net/vmxnet3/vmxnet3_rxtx.c
> index 3a8c62fc1..156dc8e52 100644
> --- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
> +++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
> @@ -659,13 +659,7 @@ vmxnet3_rx_offload(const Vmxnet3_RxCompDesc
> *rcd, struct rte_mbuf *rxm)
> 
>  	/* Check packet type, checksum errors, etc. Only support IPv4 for
> now. */
>  	if (rcd->v4) {
> -		struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct
> ether_hdr *);
> -		struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
> -
> -		if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
> -			rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
> -		else
> -			rxm->packet_type = RTE_PTYPE_L3_IPV4;
> +		rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
> 
>  		if (!rcd->cnc) {
>  			if (!rcd->ipc)
> --
> 2.11.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH 2/8] net/vmxnet3: return unknown IPv4 extension len ptype
  2018-04-16 19:46   ` Yong Wang
@ 2018-04-17  9:09     ` Didier Pallard
  0 siblings, 0 replies; 17+ messages in thread
From: Didier Pallard @ 2018-04-17  9:09 UTC (permalink / raw)
  To: Yong Wang, dev

Hi wang,

Indeed, this one is not strictly needed and does not fix anything, anyway:

- If application does not need the information (ethernet bridge, for 
example),
this access is not needed and it will never be done, so application 
performance
will be improved.

- If application needs the information, you're true, the parsing will 
just  be shifted
to the application procedure, but I think that data access locality will 
be increased
since the application will certainly do other stuff around the same 
memory location.
And in this case, final performance figures should be at worst the same 
than before the
patch.

Didier


On 04/16/2018 09:46 PM, Yong Wang wrote:
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Didier Pallard
>> Sent: Wednesday, March 28, 2018 8:44 AM
>> To: dev@dpdk.org
>> Subject: [dpdk-dev] [PATCH 2/8] net/vmxnet3: return unknown IPv4
>> extension len ptype
>>
>> Rather than parsing IP header to get proper ptype to return, just return
>> RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, that tells application that we have an
>> IP
>> packet with unknown header length.
> Any specific reason of doing this? I can image there are applications that depend on this and the cost of parsing is simply shifted to the app or via rte_eth_add_rx_callback().
>
>> Signed-off-by: Didier Pallard <didier.pallard@6wind.com>
>> ---
>>   drivers/net/vmxnet3/vmxnet3_rxtx.c | 8 +-------
>>   1 file changed, 1 insertion(+), 7 deletions(-)
>>
>> diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c
>> b/drivers/net/vmxnet3/vmxnet3_rxtx.c
>> index 3a8c62fc1..156dc8e52 100644
>> --- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
>> +++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
>> @@ -659,13 +659,7 @@ vmxnet3_rx_offload(const Vmxnet3_RxCompDesc
>> *rcd, struct rte_mbuf *rxm)
>>
>>   	/* Check packet type, checksum errors, etc. Only support IPv4 for
>> now. */
>>   	if (rcd->v4) {
>> -		struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct
>> ether_hdr *);
>> -		struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
>> -
>> -		if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
>> -			rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
>> -		else
>> -			rxm->packet_type = RTE_PTYPE_L3_IPV4;
>> +		rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
>>
>>   		if (!rcd->cnc) {
>>   			if (!rcd->ipc)
>> --
>> 2.11.0

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH 1/8] net: export IPv6 header extensions skip function
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 1/8] net: export IPv6 header extensions skip function Didier Pallard
@ 2018-04-17 19:28   ` Ferruh Yigit
  2018-04-23  8:35   ` Olivier Matz
  1 sibling, 0 replies; 17+ messages in thread
From: Ferruh Yigit @ 2018-04-17 19:28 UTC (permalink / raw)
  To: Didier Pallard, dev, Olivier MATZ

On 3/28/2018 4:43 PM, Didier Pallard wrote:
> skip_ip6_ext function can be exported as a helper, it may be used
> by some PMD to skip IPv6 header extensions.
> 
> Signed-off-by: Didier Pallard <didier.pallard@6wind.com>
> ---
>  lib/librte_net/Makefile            |  1 +
>  lib/librte_net/rte_net.c           | 21 ++++++++++++++-------
>  lib/librte_net/rte_net.h           | 27 +++++++++++++++++++++++++++
>  lib/librte_net/rte_net_version.map |  1 +
>  4 files changed, 43 insertions(+), 7 deletions(-)

cc'ed librte_net maintainer.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues
  2018-03-28 15:43 [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues Didier Pallard
                   ` (8 preceding siblings ...)
  2018-04-13  4:44 ` [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues Yong Wang
@ 2018-04-20 22:02 ` Yong Wang
  2018-04-23 14:46   ` Ferruh Yigit
  9 siblings, 1 reply; 17+ messages in thread
From: Yong Wang @ 2018-04-20 22:02 UTC (permalink / raw)
  To: Didier Pallard, dev

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Didier Pallard
> Sent: Wednesday, March 28, 2018 8:44 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues
> 
> This patchset fixes several issues found in vmxnet3 driver
> when enabling LRO offload support:
> - Rx offload information are not correctly gathered in
>   multisegmented packets, leading to inconsistent
>   packet type and Rx offload bits in resulting mbuf
> - MSS recovery from offload information is not done
>   thus LRO mbufs do not contain a correct tso_segsz value.
> - MSS value is not propagated by the host on some
>   hypervisor versions (6.0 for example)
> - If two small TCP segments are aggregated in a single
>   mbuf, an empty segment that only contains offload
>   information is appended to this segment, and is
>   propagated as is to the application. But if the application
>   sends back to the hypervisor a mbuf with an empty
>   segment, this mbuf is dropped by the hypervisor.
> 
> 
> Didier Pallard (8):
>   net: export IPv6 header extensions skip function
>   net/vmxnet3: return unknown IPv4 extension len ptype
>   net/vmxnet3: gather offload data on first and last segment
>   net/vmxnet3: fix Rx offload information in multiseg packets
>   net/vmxnet3: complete Rx offloads support
>   net/vmxnet3: guess mss if not provided in LRO mode
>   net/vmxnet3: ignore emtpy segments in reception
>   net/vmxnet3: skip empty segments in transmission
> 
>  drivers/net/vmxnet3/Makefile            |   1 +
>  drivers/net/vmxnet3/base/vmxnet3_defs.h |  27 ++++-
>  drivers/net/vmxnet3/vmxnet3_ethdev.c    |   2 +
>  drivers/net/vmxnet3/vmxnet3_ethdev.h    |   1 +
>  drivers/net/vmxnet3/vmxnet3_rxtx.c      | 200
> ++++++++++++++++++++++++++------
>  lib/librte_net/Makefile                 |   1 +
>  lib/librte_net/rte_net.c                |  21 ++--
>  lib/librte_net/rte_net.h                |  27 +++++
>  lib/librte_net/rte_net_version.map      |   1 +
>  9 files changed, 238 insertions(+), 43 deletions(-)
> 
> --
> 2.11.0

For this series,
Acked-by: Yong Wang <yongwang@vmware.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH 1/8] net: export IPv6 header extensions skip function
  2018-03-28 15:43 ` [dpdk-dev] [PATCH 1/8] net: export IPv6 header extensions skip function Didier Pallard
  2018-04-17 19:28   ` Ferruh Yigit
@ 2018-04-23  8:35   ` Olivier Matz
  1 sibling, 0 replies; 17+ messages in thread
From: Olivier Matz @ 2018-04-23  8:35 UTC (permalink / raw)
  To: Didier Pallard; +Cc: dev

On Wed, Mar 28, 2018 at 05:43:42PM +0200, Didier Pallard wrote:
> skip_ip6_ext function can be exported as a helper, it may be used
> by some PMD to skip IPv6 header extensions.
> 
> Signed-off-by: Didier Pallard <didier.pallard@6wind.com>

Acked-by: Olivier Matz <olivier.matz@6wind.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues
  2018-04-20 22:02 ` Yong Wang
@ 2018-04-23 14:46   ` Ferruh Yigit
  0 siblings, 0 replies; 17+ messages in thread
From: Ferruh Yigit @ 2018-04-23 14:46 UTC (permalink / raw)
  To: Yong Wang, Didier Pallard, dev

On 4/20/2018 11:02 PM, Yong Wang wrote:
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Didier Pallard
>> Sent: Wednesday, March 28, 2018 8:44 AM
>> To: dev@dpdk.org
>> Subject: [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues
>>
>> This patchset fixes several issues found in vmxnet3 driver
>> when enabling LRO offload support:
>> - Rx offload information are not correctly gathered in
>>   multisegmented packets, leading to inconsistent
>>   packet type and Rx offload bits in resulting mbuf
>> - MSS recovery from offload information is not done
>>   thus LRO mbufs do not contain a correct tso_segsz value.
>> - MSS value is not propagated by the host on some
>>   hypervisor versions (6.0 for example)
>> - If two small TCP segments are aggregated in a single
>>   mbuf, an empty segment that only contains offload
>>   information is appended to this segment, and is
>>   propagated as is to the application. But if the application
>>   sends back to the hypervisor a mbuf with an empty
>>   segment, this mbuf is dropped by the hypervisor.
>>
>>
>> Didier Pallard (8):
>>   net: export IPv6 header extensions skip function
>>   net/vmxnet3: return unknown IPv4 extension len ptype
>>   net/vmxnet3: gather offload data on first and last segment
>>   net/vmxnet3: fix Rx offload information in multiseg packets
>>   net/vmxnet3: complete Rx offloads support
>>   net/vmxnet3: guess mss if not provided in LRO mode
>>   net/vmxnet3: ignore emtpy segments in reception
>>   net/vmxnet3: skip empty segments in transmission

<...>

> For this series,
> Acked-by: Yong Wang <yongwang@vmware.com>

Series applied to dpdk-next-net/master, thanks.

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2018-04-23 14:46 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-28 15:43 [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues Didier Pallard
2018-03-28 15:43 ` [dpdk-dev] [PATCH 1/8] net: export IPv6 header extensions skip function Didier Pallard
2018-04-17 19:28   ` Ferruh Yigit
2018-04-23  8:35   ` Olivier Matz
2018-03-28 15:43 ` [dpdk-dev] [PATCH 2/8] net/vmxnet3: return unknown IPv4 extension len ptype Didier Pallard
2018-04-16 19:46   ` Yong Wang
2018-04-17  9:09     ` Didier Pallard
2018-03-28 15:43 ` [dpdk-dev] [PATCH 3/8] net/vmxnet3: gather offload data on first and last segment Didier Pallard
2018-03-28 15:43 ` [dpdk-dev] [PATCH 4/8] net/vmxnet3: fix Rx offload information in multiseg packets Didier Pallard
2018-03-28 15:43 ` [dpdk-dev] [PATCH 5/8] net/vmxnet3: complete Rx offloads support Didier Pallard
2018-03-28 15:43 ` [dpdk-dev] [PATCH 6/8] net/vmxnet3: guess mss if not provided in LRO mode Didier Pallard
2018-03-28 15:43 ` [dpdk-dev] [PATCH 7/8] net/vmxnet3: ignore emtpy segments in reception Didier Pallard
2018-03-28 15:43 ` [dpdk-dev] [PATCH 8/8] net/vmxnet3: skip empty segments in transmission Didier Pallard
2018-04-13  4:44 ` [dpdk-dev] [PATCH 0/8] net/vmxnet3: fix offload issues Yong Wang
2018-04-13 14:33   ` Didier Pallard
2018-04-20 22:02 ` Yong Wang
2018-04-23 14:46   ` Ferruh Yigit

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).