DPDK patches and discussions
 help / color / mirror / Atom feed
From: Olivier Matz <olivier.matz@6wind.com>
To: dev@dpdk.org
Cc: jigsaw@gmail.com
Subject: [dpdk-dev] [PATCH 07/12] mbuf: generic support for TCP segmentation offload
Date: Mon, 10 Nov 2014 16:59:21 +0100	[thread overview]
Message-ID: <1415635166-1364-8-git-send-email-olivier.matz@6wind.com> (raw)
In-Reply-To: <1415635166-1364-1-git-send-email-olivier.matz@6wind.com>

Some of the NICs supported by DPDK have a possibility to accelerate TCP
traffic by using segmentation offload. The application prepares a packet
with valid TCP header with size up to 64K and deleguates the
segmentation to the NIC.

Implement the generic part of TCP segmentation offload in rte_mbuf. It
introduces 2 new fields in rte_mbuf: l4_len (length of L4 header in bytes)
and tso_segsz (MSS of packets).

To delegate the TCP segmentation to the hardware, the user has to:

- set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
  PKT_TX_TCP_CKSUM)
- set PKT_TX_IP_CKSUM if it's IPv4, and set the IP checksum to 0 in
  the packet
- fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
- calculate the pseudo header checksum and set it in the TCP header,
  as required when doing hardware TCP checksum offload

The API is inspired from ixgbe hardware (the next commit adds the
support for ixgbe), but it seems generic enough to be used for other
hw/drivers in the future.

This commit also reworks the way l2_len and l3_len are used in igb
and ixgbe drivers as the l2_l3_len is not available anymore in mbuf.

Signed-off-by: Mirek Walukiewicz <miroslaw.walukiewicz@intel.com>
Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
---
 app/test-pmd/testpmd.c            |  3 ++-
 examples/ipv4_multicast/main.c    |  3 ++-
 lib/librte_mbuf/rte_mbuf.h        | 44 +++++++++++++++++++++++----------------
 lib/librte_pmd_e1000/igb_rxtx.c   | 11 +++++++++-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 11 +++++++++-
 5 files changed, 50 insertions(+), 22 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 12adafa..a831e31 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -408,7 +408,8 @@ testpmd_mbuf_ctor(struct rte_mempool *mp,
 	mb->ol_flags     = 0;
 	mb->data_off     = RTE_PKTMBUF_HEADROOM;
 	mb->nb_segs      = 1;
-	mb->l2_l3_len       = 0;
+	mb->l2_len       = 0;
+	mb->l3_len       = 0;
 	mb->vlan_tci     = 0;
 	mb->hash.rss     = 0;
 }
diff --git a/examples/ipv4_multicast/main.c b/examples/ipv4_multicast/main.c
index de5e6be..a31d43d 100644
--- a/examples/ipv4_multicast/main.c
+++ b/examples/ipv4_multicast/main.c
@@ -302,7 +302,8 @@ mcast_out_pkt(struct rte_mbuf *pkt, int use_clone)
 	/* copy metadata from source packet*/
 	hdr->port = pkt->port;
 	hdr->vlan_tci = pkt->vlan_tci;
-	hdr->l2_l3_len = pkt->l2_l3_len;
+	hdr->l2_len = pkt->l2_len;
+	hdr->l3_len = pkt->l3_len;
 	hdr->hash = pkt->hash;
 
 	hdr->ol_flags = pkt->ol_flags;
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index bcd8996..f76b768 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -126,6 +126,19 @@ extern "C" {
 
 #define PKT_TX_VXLAN_CKSUM   (1ULL << 50) /**< TX checksum of VXLAN computed by NIC */
 
+/**
+ * TCP segmentation offload. To enable this offload feature for a
+ * packet to be transmitted on hardware supporting TSO:
+ *  - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
+ *    PKT_TX_TCP_CKSUM)
+ *  - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
+ *    to 0 in the packet
+ *  - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
+ *  - calculate the pseudo header checksum and set it in the TCP header,
+ *    as required when doing hardware TCP checksum offload
+ */
+#define PKT_TX_TCP_SEG       (1ULL << 49)
+
 /* Use final bit of flags to indicate a control mbuf */
 #define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains control data */
 
@@ -185,6 +198,7 @@ static inline const char *rte_get_tx_ol_flag_name(uint64_t mask)
 	case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
 	case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
 	case PKT_TX_VXLAN_CKSUM: return "PKT_TX_VXLAN_CKSUM";
+	case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
 	default: return NULL;
 	}
 }
@@ -264,22 +278,18 @@ struct rte_mbuf {
 
 	/* fields to support TX offloads */
 	union {
-		uint16_t l2_l3_len; /**< combined l2/l3 lengths as single var */
+		uint64_t tx_offload;       /**< combined for easy fetch */
 		struct {
-			uint16_t l3_len:9;      /**< L3 (IP) Header Length. */
-			uint16_t l2_len:7;      /**< L2 (MAC) Header Length. */
-		};
-	};
+			uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
+			uint64_t tso_segsz:16; /**< TCP TSO segment size */
 
-	/* fields for TX offloading of tunnels */
-	union {
-		uint16_t inner_l2_l3_len;
-		/**< combined inner l2/l3 lengths as single var */
-		struct {
-			uint16_t inner_l3_len:9;
-			/**< inner L3 (IP) Header Length. */
-			uint16_t inner_l2_len:7;
-			/**< inner L2 (MAC) Header Length. */
+			/* fields for TX offloading of tunnels */
+			uint16_t inner_l3_len:9; /**< inner L3 (IP) Hdr Length. */
+			uint16_t inner_l2_len:7; /**< inner L2 (MAC) Hdr Length. */
+
+			/* uint64_t unused:8; */
 		};
 	};
 } __rte_cache_aligned;
@@ -631,8 +641,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
 {
 	m->next = NULL;
 	m->pkt_len = 0;
-	m->l2_l3_len = 0;
-	m->inner_l2_l3_len = 0;
+	m->tx_offload = 0;
 	m->vlan_tci = 0;
 	m->nb_segs = 1;
 	m->port = 0xff;
@@ -701,8 +710,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *md)
 	mi->data_len = md->data_len;
 	mi->port = md->port;
 	mi->vlan_tci = md->vlan_tci;
-	mi->l2_l3_len = md->l2_l3_len;
-	mi->inner_l2_l3_len = md->inner_l2_l3_len;
+	mi->tx_offload = md->tx_offload;
 	mi->hash = md->hash;
 
 	mi->next = NULL;
diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c
index dbf5074..0a9447e 100644
--- a/lib/librte_pmd_e1000/igb_rxtx.c
+++ b/lib/librte_pmd_e1000/igb_rxtx.c
@@ -361,6 +361,13 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct rte_mbuf     *tx_pkt;
 	struct rte_mbuf     *m_seg;
 	union igb_vlan_macip vlan_macip_lens;
+	union {
+		uint16_t u16;
+		struct {
+			uint16_t l3_len:9;
+			uint16_t l2_len:7;
+		};
+	} l2_l3_len;
 	uint64_t buf_dma_addr;
 	uint32_t olinfo_status;
 	uint32_t cmd_type_len;
@@ -398,8 +405,10 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
 
 		ol_flags = tx_pkt->ol_flags;
+		l2_l3_len.l2_len = tx_pkt->l2_len;
+		l2_l3_len.l3_len = tx_pkt->l3_len;
 		vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci;
-		vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len;
+		vlan_macip_lens.f.l2_l3_len = l2_l3_len.u16;
 		tx_ol_req = ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM |
 			PKT_TX_L4_MASK);
 
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 70ca254..54a0fc1 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -540,6 +540,13 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct rte_mbuf     *tx_pkt;
 	struct rte_mbuf     *m_seg;
 	union ixgbe_vlan_macip vlan_macip_lens;
+	union {
+		uint16_t u16;
+		struct {
+			uint16_t l3_len:9;
+			uint16_t l2_len:7;
+		};
+	} l2_l3_len;
 	uint64_t buf_dma_addr;
 	uint32_t olinfo_status;
 	uint32_t cmd_type_len;
@@ -583,8 +590,10 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		tx_ol_req = ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM |
 			PKT_TX_L4_MASK);
 		if (tx_ol_req) {
+			l2_l3_len.l2_len = tx_pkt->l2_len;
+			l2_l3_len.l3_len = tx_pkt->l3_len;
 			vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci;
-			vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len;
+			vlan_macip_lens.f.l2_l3_len = l2_l3_len.u16;
 
 			/* If new context need be built or reuse the exist ctx. */
 			ctx = what_advctx_update(txq, tx_ol_req,
-- 
2.1.0

  parent reply	other threads:[~2014-11-10 15:58 UTC|newest]

Thread overview: 112+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-11-10 15:59 [dpdk-dev] [PATCH 00/12] add TSO support Olivier Matz
2014-11-10 15:59 ` [dpdk-dev] [PATCH 01/12] igb/ixgbe: fix IP checksum calculation Olivier Matz
2014-11-10 15:59 ` [dpdk-dev] [PATCH 02/12] ixgbe: fix remaining pkt_flags variable size to 64 bits Olivier Matz
2014-11-10 16:59   ` Bruce Richardson
2014-11-10 15:59 ` [dpdk-dev] [PATCH 03/12] mbuf: move vxlan_cksum flag definition at the proper place Olivier Matz
2014-11-10 17:09   ` Bruce Richardson
2014-11-10 15:59 ` [dpdk-dev] [PATCH 04/12] mbuf: add help about TX checksum flags Olivier Matz
2014-11-10 17:10   ` Bruce Richardson
2014-11-10 15:59 ` [dpdk-dev] [PATCH 05/12] mbuf: remove too specific PKT_TX_OFFLOAD_MASK definition Olivier Matz
2014-11-10 17:14   ` Bruce Richardson
2014-11-10 20:59     ` Olivier MATZ
2014-11-10 15:59 ` [dpdk-dev] [PATCH 06/12] mbuf: add functions to get the name of an ol_flag Olivier Matz
2014-11-10 17:29   ` Bruce Richardson
2014-11-10 20:54     ` Olivier MATZ
2014-11-12 17:21     ` Ananyev, Konstantin
2014-11-12 17:44       ` Olivier MATZ
2014-11-10 15:59 ` Olivier Matz [this message]
2014-11-11  3:17   ` [dpdk-dev] [PATCH 07/12] mbuf: generic support for TCP segmentation offload Liu, Jijiang
2014-11-12 13:09   ` Ananyev, Konstantin
2014-11-10 15:59 ` [dpdk-dev] [PATCH 08/12] ixgbe: support " Olivier Matz
2014-11-10 15:59 ` [dpdk-dev] [PATCH 09/12] testpmd: fix use of offload flags in testpmd Olivier Matz
2014-11-10 15:59 ` [dpdk-dev] [PATCH 10/12] testpmd: rework csum forward engine Olivier Matz
2014-11-11  8:35   ` Liu, Jijiang
2014-11-11  9:55     ` Olivier MATZ
2014-11-10 15:59 ` [dpdk-dev] [PATCH 11/12] testpmd: support TSO in " Olivier Matz
2014-11-10 15:59 ` [dpdk-dev] [PATCH 12/12] testpmd: add a verbose mode " Olivier Matz
2014-11-11  9:21 ` [dpdk-dev] [PATCH 00/12] add TSO support Olivier MATZ
2014-11-11  9:48   ` Olivier MATZ
2014-11-14 17:03 ` [dpdk-dev] [PATCH v2 00/13] " Olivier Matz
2014-11-14 17:03   ` [dpdk-dev] [PATCH v2 01/13] igb/ixgbe: fix IP checksum calculation Olivier Matz
2014-11-14 17:03   ` [dpdk-dev] [PATCH v2 02/13] ixgbe: fix remaining pkt_flags variable size to 64 bits Olivier Matz
2014-11-17 16:47     ` Walukiewicz, Miroslaw
2014-11-17 17:03       ` Olivier MATZ
2014-11-17 17:40         ` Thomas Monjalon
2014-11-14 17:03   ` [dpdk-dev] [PATCH v2 03/13] mbuf: move vxlan_cksum flag definition at the proper place Olivier Matz
2014-11-17 22:05     ` Thomas Monjalon
2014-11-18 14:10       ` Olivier MATZ
2014-11-14 17:03   ` [dpdk-dev] [PATCH v2 04/13] mbuf: add help about TX checksum flags Olivier Matz
2014-11-14 17:03   ` [dpdk-dev] [PATCH v2 05/13] mbuf: remove too specific PKT_TX_OFFLOAD_MASK definition Olivier Matz
2014-11-17 10:35     ` Bruce Richardson
2014-11-14 17:03   ` [dpdk-dev] [PATCH v2 06/13] mbuf: add functions to get the name of an ol_flag Olivier Matz
2014-11-17 10:39     ` Bruce Richardson
2014-11-17 12:51       ` Olivier MATZ
2014-11-17 19:00     ` Ananyev, Konstantin
2014-11-18  9:29       ` Olivier MATZ
2014-11-19 11:06         ` Ananyev, Konstantin
2014-11-25 10:37           ` Ananyev, Konstantin
2014-11-25 12:15             ` Zhang, Helin
2014-11-25 12:37               ` Olivier MATZ
2014-11-25 13:31                 ` Zhang, Helin
2014-11-25 13:49               ` Ananyev, Konstantin
2014-11-26  0:58                 ` Zhang, Helin
2014-11-14 17:03   ` [dpdk-dev] [PATCH v2 07/13] testpmd: fix use of offload flags in testpmd Olivier Matz
2014-11-14 17:03   ` [dpdk-dev] [PATCH v2 08/13] testpmd: rework csum forward engine Olivier Matz
2014-11-17  8:11     ` Liu, Jijiang
2014-11-17 13:00       ` Olivier MATZ
2014-11-14 17:03   ` [dpdk-dev] [PATCH v2 09/13] mbuf: introduce new checksum API Olivier Matz
2014-11-17 18:15     ` Ananyev, Konstantin
2014-11-18  9:10       ` Olivier MATZ
2014-11-14 17:03   ` [dpdk-dev] [PATCH v2 10/13] mbuf: generic support for TCP segmentation offload Olivier Matz
2014-11-17 23:33     ` Ananyev, Konstantin
2014-11-14 17:03   ` [dpdk-dev] [PATCH v2 11/13] ixgbe: support " Olivier Matz
2014-11-17 18:26     ` Ananyev, Konstantin
2014-11-18  9:11       ` Olivier MATZ
2014-11-14 17:03   ` [dpdk-dev] [PATCH v2 12/13] testpmd: support TSO in csum forward engine Olivier Matz
2014-11-14 17:03   ` [dpdk-dev] [PATCH v2 13/13] testpmd: add a verbose mode " Olivier Matz
2014-11-20 22:58   ` [dpdk-dev] [PATCH v3 00/13] add TSO support Olivier Matz
2014-11-20 22:58     ` [dpdk-dev] [PATCH v3 01/13] igb/ixgbe: fix IP checksum calculation Olivier Matz
2014-11-20 22:58     ` [dpdk-dev] [PATCH v3 02/13] ixgbe: fix remaining pkt_flags variable size to 64 bits Olivier Matz
2014-11-20 22:58     ` [dpdk-dev] [PATCH v3 03/13] mbuf: reorder tx ol_flags Olivier Matz
2014-11-25 10:22       ` Thomas Monjalon
2014-11-20 22:58     ` [dpdk-dev] [PATCH v3 04/13] mbuf: add help about TX checksum flags Olivier Matz
2014-11-20 22:58     ` [dpdk-dev] [PATCH v3 05/13] mbuf: remove too specific PKT_TX_OFFLOAD_MASK definition Olivier Matz
2014-11-20 22:58     ` [dpdk-dev] [PATCH v3 06/13] mbuf: add functions to get the name of an ol_flag Olivier Matz
2014-11-25 10:23       ` Thomas Monjalon
2014-11-20 22:58     ` [dpdk-dev] [PATCH v3 07/13] testpmd: fix use of offload flags in testpmd Olivier Matz
2014-11-25 11:52       ` Ananyev, Konstantin
2014-11-20 22:58     ` [dpdk-dev] [PATCH v3 08/13] testpmd: rework csum forward engine Olivier Matz
2014-11-26 10:10       ` Ananyev, Konstantin
2014-11-26 11:14         ` Olivier MATZ
2014-11-26 12:25           ` Ananyev, Konstantin
2014-11-26 14:55             ` Olivier MATZ
2014-11-26 16:34               ` Ananyev, Konstantin
2014-11-27  8:34                 ` Liu, Jijiang
2014-11-26 13:59           ` Liu, Jijiang
2014-11-20 22:58     ` [dpdk-dev] [PATCH v3 09/13] mbuf: introduce new checksum API Olivier Matz
2014-11-20 22:58     ` [dpdk-dev] [PATCH v3 10/13] mbuf: generic support for TCP segmentation offload Olivier Matz
2014-11-20 22:58     ` [dpdk-dev] [PATCH v3 11/13] ixgbe: support " Olivier Matz
2014-11-20 22:58     ` [dpdk-dev] [PATCH v3 12/13] testpmd: support TSO in csum forward engine Olivier Matz
2014-11-20 22:58     ` [dpdk-dev] [PATCH v3 13/13] testpmd: add a verbose mode " Olivier Matz
2014-11-26 15:04     ` [dpdk-dev] [PATCH v4 00/13] add TSO support Olivier Matz
2014-11-26 15:04       ` [dpdk-dev] [PATCH v4 01/13] igb/ixgbe: fix IP checksum calculation Olivier Matz
2014-11-26 15:04       ` [dpdk-dev] [PATCH v4 02/13] ixgbe: fix remaining pkt_flags variable size to 64 bits Olivier Matz
2014-11-26 15:04       ` [dpdk-dev] [PATCH v4 03/13] mbuf: reorder tx ol_flags Olivier Matz
2014-11-26 15:04       ` [dpdk-dev] [PATCH v4 04/13] mbuf: add help about TX checksum flags Olivier Matz
2014-11-26 15:04       ` [dpdk-dev] [PATCH v4 05/13] mbuf: remove too specific PKT_TX_OFFLOAD_MASK definition Olivier Matz
2014-11-26 15:04       ` [dpdk-dev] [PATCH v4 06/13] mbuf: add functions to get the name of an ol_flag Olivier Matz
2014-11-26 15:04       ` [dpdk-dev] [PATCH v4 07/13] testpmd: fix use of offload flags in testpmd Olivier Matz
2014-11-26 15:04       ` [dpdk-dev] [PATCH v4 08/13] testpmd: rework csum forward engine Olivier Matz
2014-11-26 20:02         ` Ananyev, Konstantin
2014-11-27  8:26           ` Liu, Jijiang
2014-11-27  9:10           ` Olivier MATZ
2014-11-27 11:02             ` Ananyev, Konstantin
2014-11-28  8:54               ` Liu, Jijiang
2014-11-28  9:54                 ` Olivier MATZ
2014-11-26 15:04       ` [dpdk-dev] [PATCH v4 09/13] mbuf: introduce new checksum API Olivier Matz
2014-11-26 15:04       ` [dpdk-dev] [PATCH v4 10/13] mbuf: generic support for TCP segmentation offload Olivier Matz
2014-11-26 15:04       ` [dpdk-dev] [PATCH v4 11/13] ixgbe: support " Olivier Matz
2014-11-26 15:04       ` [dpdk-dev] [PATCH v4 12/13] testpmd: support TSO in csum forward engine Olivier Matz
2014-11-26 21:23         ` Ananyev, Konstantin
2014-11-26 15:04       ` [dpdk-dev] [PATCH v4 13/13] testpmd: add a verbose mode " Olivier Matz
2014-11-26 18:30       ` [dpdk-dev] [PATCH v4 00/13] add TSO support Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1415635166-1364-8-git-send-email-olivier.matz@6wind.com \
    --to=olivier.matz@6wind.com \
    --cc=dev@dpdk.org \
    --cc=jigsaw@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).