DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support
@ 2013-10-04 17:06 Qinglai Xiao
  2013-10-04 17:06 ` [dpdk-dev] [PATCH] ixgbe: TCP/UDP segment offload support on 82599 Qinglai Xiao
  2013-10-04 17:40 ` [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support Stephen Hemminger
  0 siblings, 2 replies; 10+ messages in thread
From: Qinglai Xiao @ 2013-10-04 17:06 UTC (permalink / raw)
  To: dev

This patch is a draft of TSO on 82599. That is, it is not expected to be
accepted as is.
The problem is where to put the mss field. In this patch, the mss is put in
the union of hash in rte_pktmbuf. It is not the best place, but it is quite
convenient, since hash is not used in TX procedure.
The idea is to avoid increasing sizeof(struct rte_pktmbuf), while keeping mss
easy to access.

However, the hash is also misleading, coz mss has nothing to do with Rx hash.
A more formal way could be rename hash as below:

	union {
		uint32_t data;
		struct rx_hash hash;
		uint32_t tx_mss;
	} misc;	

It is gonna be a major change coz it affects the core data structure.

Any comments will be appreciated.

Qinglai Xiao (1):
  ixgbe: TCP/UDP segment offload support on 82599.

 lib/librte_mbuf/rte_mbuf.h        |    6 +++++-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c |   32 +++++++++++++++++++++++++++++---
 2 files changed, 34 insertions(+), 4 deletions(-)

-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [dpdk-dev] [PATCH] ixgbe: TCP/UDP segment offload support on 82599.
  2013-10-04 17:06 [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support Qinglai Xiao
@ 2013-10-04 17:06 ` Qinglai Xiao
  2013-10-04 17:40 ` [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support Stephen Hemminger
  1 sibling, 0 replies; 10+ messages in thread
From: Qinglai Xiao @ 2013-10-04 17:06 UTC (permalink / raw)
  To: dev

Add support for TCP/UDP segment offload on 82599.
User can turn on TSO by setting MSS in the first frame.
Meantime, the L2 and L3 len, together with offload flags must be set in the
first frame accordingly. Otherwise the driver will cease the sending.
---
 lib/librte_mbuf/rte_mbuf.h        |    6 +++++-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c |   32 +++++++++++++++++++++++++++++---
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index d914562..ea4bb88 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -159,6 +159,10 @@ struct rte_pktmbuf {
 			uint16_t id;
 		} fdir;             /**< Filter identifier if FDIR enabled */
 		uint32_t sched;     /**< Hierarchical scheduler */
+		uint16_t mss;       /**< Maximum Segment Size. If more than zero,
+					 then TSO is enabled. User is responsible
+					 for setting vlan_macip and TCP/IP cksum
+					 accordingly. */
 	} hash;                 /**< hash information */
 };
 
@@ -195,7 +199,7 @@ struct rte_mbuf {
 	uint16_t refcnt_reserved;     /**< Do not use this field */
 #endif
 	uint8_t type;                 /**< Type of mbuf. */
-	uint8_t reserved;             /**< Unused field. Required for padding. */
+	uint8_t reserved;             /**< Unused field. Required for padding. */ 
 	uint16_t ol_flags;            /**< Offload features. */
 
 	union {
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 5c8668e..63d7f8a 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -498,7 +498,7 @@ ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 static inline void
 ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
 		volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
-		uint16_t ol_flags, uint32_t vlan_macip_lens)
+		uint16_t ol_flags, uint32_t vlan_macip_lens, uint16_t mss)
 {
 	uint32_t type_tucmd_mlhl;
 	uint32_t mss_l4len_idx;
@@ -520,6 +520,10 @@ ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
 
 	/* Specify which HW CTX to upload. */
 	mss_l4len_idx = (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
+
+	/* MSS is reqired for TSO. The user must set mss accordingly */
+	mss_l4len_idx |= mss << IXGBE_ADVTXD_MSS_SHIFT;
+
 	switch (ol_flags & PKT_TX_L4_MASK) {
 	case PKT_TX_UDP_CKSUM:
 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
@@ -694,6 +698,7 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint32_t vlan_macip_lens;
 	uint32_t ctx = 0;
 	uint32_t new_ctx;
+	uint16_t mss;
 
 	txq = tx_queue;
 	sw_ring = txq->sw_ring;
@@ -719,10 +724,25 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		 * are needed for offload functionality.
 		 */
 		ol_flags = tx_pkt->ol_flags;
+
 		vlan_macip_lens = tx_pkt->pkt.vlan_macip.data;
+		mss = tx_pkt->pkt.hash.mss;
 
 		/* If hardware offload required */
 		tx_ol_req = (uint16_t)(ol_flags & PKT_TX_OFFLOAD_MASK);
+
+		/*
+		 * If mss is set, we assume TSO is required.
+		 *
+		 * If TSO is turned on, the caller must set the offload bits
+		 * accordingly, otherwise we have to drop the packet, because
+		 * we have no knowledge of L2 or L3.
+		 */
+		if (!tx_ol_req && mss) {
+			PMD_TX_LOG(DEBUG, "TSO set without offload bits. Abort sending.");
+			goto end_of_tx;
+		}
+
 		if (tx_ol_req) {
 			/* If new context need be built or reuse the exist ctx. */
 			ctx = what_advctx_update(txq, tx_ol_req,
@@ -841,6 +861,11 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		 */
 		cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
 			IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
+
+		/* Enable TSE bit for TSO */
+		if (mss)
+			cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
+
 		olinfo_status = (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
 #ifdef RTE_LIBRTE_IEEE1588
 		if (ol_flags & PKT_TX_IEEE1588_TMST)
@@ -868,7 +893,7 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 				}
 
 				ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
-				    vlan_macip_lens);
+				    vlan_macip_lens, mss);
 
 				txe->last_id = tx_last;
 				tx_id = txe->next_id;
@@ -3392,7 +3417,8 @@ ixgbe_dev_tx_init(struct rte_eth_dev *dev)
 
 	/* Enable TX CRC (checksum offload requirement) */
 	hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
-	hlreg0 |= IXGBE_HLREG0_TXCRCEN;
+	/* IXGBE_HLREG0_TXPADEN is required for TCP segmentation offload */
+	hlreg0 |= IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN;
 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
 
 	/* Setup the Base and Length of the Tx Descriptor Rings */
-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support
  2013-10-04 17:06 [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support Qinglai Xiao
  2013-10-04 17:06 ` [dpdk-dev] [PATCH] ixgbe: TCP/UDP segment offload support on 82599 Qinglai Xiao
@ 2013-10-04 17:40 ` Stephen Hemminger
  2013-10-04 17:54   ` jigsaw
  1 sibling, 1 reply; 10+ messages in thread
From: Stephen Hemminger @ 2013-10-04 17:40 UTC (permalink / raw)
  To: Qinglai Xiao; +Cc: dev

On Fri,  4 Oct 2013 20:06:52 +0300
Qinglai Xiao <jigsaw@gmail.com> wrote:

> This patch is a draft of TSO on 82599. That is, it is not expected to be
> accepted as is.
> The problem is where to put the mss field. In this patch, the mss is put in
> the union of hash in rte_pktmbuf. It is not the best place, but it is quite
> convenient, since hash is not used in TX procedure.
> The idea is to avoid increasing sizeof(struct rte_pktmbuf), while keeping mss
> easy to access.
> 
> However, the hash is also misleading, coz mss has nothing to do with Rx hash.
> A more formal way could be rename hash as below:
> 
> 	union {
> 		uint32_t data;
> 		struct rx_hash hash;
> 		uint32_t tx_mss;
> 	} misc;	
> 
> It is gonna be a major change coz it affects the core data structure.
> 
> Any comments will be appreciated.
> 
> Qinglai Xiao (1):
>   ixgbe: TCP/UDP segment offload support on 82599.
> 
>  lib/librte_mbuf/rte_mbuf.h        |    6 +++++-
>  lib/librte_pmd_ixgbe/ixgbe_rxtx.c |   32 +++++++++++++++++++++++++++++---
>  2 files changed, 34 insertions(+), 4 deletions(-)
> 

This will work for local generated packets but overlapping existing
field won't work well for forwarding.

What we want to be able to do is to take offload (jumbo) packets in
with from virtio (need better driver support which I am doing), and then
send them through to network devices.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support
  2013-10-04 17:40 ` [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support Stephen Hemminger
@ 2013-10-04 17:54   ` jigsaw
  2013-10-04 18:23     ` Stephen Hemminger
  0 siblings, 1 reply; 10+ messages in thread
From: jigsaw @ 2013-10-04 17:54 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev

Hi Stephen,


>>This will work for local generated packets but overlapping existing field won't work well for forwarding.
So adding a new mss field in mbuf could be the way out? or I
misunderstand something.

>> What we want to be able to do is to take offload (jumbo) packets in with from virtio
Sorry I don't understand why TSO is connected to virtio. Could you
give more details here?
Are you suggesting this TSO patch overlaps your work, or it should be
based on your work?


thx &
rgds,
-Qinglai

On Fri, Oct 4, 2013 at 8:40 PM, Stephen Hemminger
<stephen@networkplumber.org> wrote:
> On Fri,  4 Oct 2013 20:06:52 +0300
> Qinglai Xiao <jigsaw@gmail.com> wrote:
>
>> This patch is a draft of TSO on 82599. That is, it is not expected to be
>> accepted as is.
>> The problem is where to put the mss field. In this patch, the mss is put in
>> the union of hash in rte_pktmbuf. It is not the best place, but it is quite
>> convenient, since hash is not used in TX procedure.
>> The idea is to avoid increasing sizeof(struct rte_pktmbuf), while keeping mss
>> easy to access.
>>
>> However, the hash is also misleading, coz mss has nothing to do with Rx hash.
>> A more formal way could be rename hash as below:
>>
>>       union {
>>               uint32_t data;
>>               struct rx_hash hash;
>>               uint32_t tx_mss;
>>       } misc;
>>
>> It is gonna be a major change coz it affects the core data structure.
>>
>> Any comments will be appreciated.
>>
>> Qinglai Xiao (1):
>>   ixgbe: TCP/UDP segment offload support on 82599.
>>
>>  lib/librte_mbuf/rte_mbuf.h        |    6 +++++-
>>  lib/librte_pmd_ixgbe/ixgbe_rxtx.c |   32 +++++++++++++++++++++++++++++---
>>  2 files changed, 34 insertions(+), 4 deletions(-)
>>
>
> This will work for local generated packets but overlapping existing
> field won't work well for forwarding.
>
> What we want to be able to do is to take offload (jumbo) packets in
> with from virtio (need better driver support which I am doing), and then
> send them through to network devices.
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support
  2013-10-04 17:54   ` jigsaw
@ 2013-10-04 18:23     ` Stephen Hemminger
  2013-10-04 18:38       ` Venkatesan, Venky
  0 siblings, 1 reply; 10+ messages in thread
From: Stephen Hemminger @ 2013-10-04 18:23 UTC (permalink / raw)
  To: jigsaw; +Cc: dev

On Fri, 4 Oct 2013 20:54:31 +0300
jigsaw <jigsaw@gmail.com> wrote:

> Hi Stephen,
> 
> 
> >>This will work for local generated packets but overlapping existing field won't work well for forwarding.
> So adding a new mss field in mbuf could be the way out? or I
> misunderstand something.
> 
> >> What we want to be able to do is to take offload (jumbo) packets in with from virtio
> Sorry I don't understand why TSO is connected to virtio. Could you
> give more details here?
> Are you suggesting this TSO patch overlaps your work, or it should be
> based on your work?

I am working on a better virtio driver. Already have lots more features working,
and doing better offload support is planned.

TSO is a subset of the more generic segment offload (GSO) on Linux.
With virtio is possible to receive GSO packets as well as send them.
This feature is negotiated between guest and host.

The idea is that between guests they can exchange jumbo (64K) packets even with
a smaller MTU. This helps in many ways. One example is only a single
route lookup is needed.

Another issue is that the current DPDK model of offload flags for checksum is problematic.
It matches what is available in Intel hardware and is not easily generalizable to other
devices.

Current DPDK flag is checksum bad. I would like to change it to checksum known
good. Then drivers which dont' do checksum would leave it 0, but if receive
checksum is known good set it to 1.  Basically 1 means known good, and
0 means unknown (or bad).  Higher level software can then do sw checksum
if necessary.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support
  2013-10-04 18:23     ` Stephen Hemminger
@ 2013-10-04 18:38       ` Venkatesan, Venky
  2013-10-04 19:10         ` jigsaw
  0 siblings, 1 reply; 10+ messages in thread
From: Venkatesan, Venky @ 2013-10-04 18:38 UTC (permalink / raw)
  To: Stephen Hemminger, jigsaw; +Cc: dev

Stephen, 

Agree on the checksum flag definition. I'm presuming that we should do this on the L3 and L4 checksums separately (that ol_flags field is another one that needs extension in the mbuf). 

Regards, 
-Venky


-----Original Message-----
From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen Hemminger
Sent: Friday, October 04, 2013 11:23 AM
To: jigsaw
Cc: dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support

On Fri, 4 Oct 2013 20:54:31 +0300
jigsaw <jigsaw@gmail.com> wrote:

> Hi Stephen,
> 
> 
> >>This will work for local generated packets but overlapping existing field won't work well for forwarding.
> So adding a new mss field in mbuf could be the way out? or I 
> misunderstand something.
> 
> >> What we want to be able to do is to take offload (jumbo) packets in 
> >> with from virtio
> Sorry I don't understand why TSO is connected to virtio. Could you 
> give more details here?
> Are you suggesting this TSO patch overlaps your work, or it should be 
> based on your work?

I am working on a better virtio driver. Already have lots more features working, and doing better offload support is planned.

TSO is a subset of the more generic segment offload (GSO) on Linux.
With virtio is possible to receive GSO packets as well as send them.
This feature is negotiated between guest and host.

The idea is that between guests they can exchange jumbo (64K) packets even with a smaller MTU. This helps in many ways. One example is only a single route lookup is needed.

Another issue is that the current DPDK model of offload flags for checksum is problematic.
It matches what is available in Intel hardware and is not easily generalizable to other devices.

Current DPDK flag is checksum bad. I would like to change it to checksum known good. Then drivers which dont' do checksum would leave it 0, but if receive checksum is known good set it to 1.  Basically 1 means known good, and
0 means unknown (or bad).  Higher level software can then do sw checksum if necessary.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support
  2013-10-04 18:38       ` Venkatesan, Venky
@ 2013-10-04 19:10         ` jigsaw
  2013-10-04 21:19           ` Stephen Hemminger
  2013-10-08  6:59           ` Zhu, Heqing
  0 siblings, 2 replies; 10+ messages in thread
From: jigsaw @ 2013-10-04 19:10 UTC (permalink / raw)
  To: Venkatesan, Venky; +Cc: dev

Hi Stephen,

Thanks for showing a bigger picture.

GSO is quite big implementation, that I think it won't be easily
ported to DPDK. The mbuf needs to be equipped with many fields from
skb to be able to deal with GSO.
Do you have the plan to port GSO to DPDK, or you would like to keep
GSO in scope of virtio?

Regarding checksum flags, actually I was also thinking of extending
ol_flags but then I gave it up coz I was worried about the size of
mbuf.
My current patch has to push some work to user, due to the fact that
mbuf delivers too few info (such as L2 and L3 protocol details).

Besides, as you mentioned, the ixgbe driver doesn't leverage the
hardware receive checksum offloading at all. And if this is to be
supported, the checksum flag need further extension.
(On the other hand, TSO doesn't care about receive checksum offloading).
Again, do you have plans to extend cksum flags so that virio feels
more comfortable with DPDK?

Hi Venky,

I can either make the commit now as is, or wait till the cksum flags
extension is in place. If Stephen (or somebody else) has the plan for
better support for cksum offloading or GSO, it is perhaps better to
implement TSO on top of that.

BTW, I have another small question. Current TSO patch offloads the
TCP/IP pseudo cksum work to user. Do you think DPDK could provide some
utility functions for TCP/IPv4/IPv6 pseudo cksum calculation and
updating?

thx &
rgds,
-Qinglai


On Fri, Oct 4, 2013 at 9:38 PM, Venkatesan, Venky
<venky.venkatesan@intel.com> wrote:
> Stephen,
>
> Agree on the checksum flag definition. I'm presuming that we should do this on the L3 and L4 checksums separately (that ol_flags field is another one that needs extension in the mbuf).
>
> Regards,
> -Venky
>
>
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen Hemminger
> Sent: Friday, October 04, 2013 11:23 AM
> To: jigsaw
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support
>
> On Fri, 4 Oct 2013 20:54:31 +0300
> jigsaw <jigsaw@gmail.com> wrote:
>
>> Hi Stephen,
>>
>>
>> >>This will work for local generated packets but overlapping existing field won't work well for forwarding.
>> So adding a new mss field in mbuf could be the way out? or I
>> misunderstand something.
>>
>> >> What we want to be able to do is to take offload (jumbo) packets in
>> >> with from virtio
>> Sorry I don't understand why TSO is connected to virtio. Could you
>> give more details here?
>> Are you suggesting this TSO patch overlaps your work, or it should be
>> based on your work?
>
> I am working on a better virtio driver. Already have lots more features working, and doing better offload support is planned.
>
> TSO is a subset of the more generic segment offload (GSO) on Linux.
> With virtio is possible to receive GSO packets as well as send them.
> This feature is negotiated between guest and host.
>
> The idea is that between guests they can exchange jumbo (64K) packets even with a smaller MTU. This helps in many ways. One example is only a single route lookup is needed.
>
> Another issue is that the current DPDK model of offload flags for checksum is problematic.
> It matches what is available in Intel hardware and is not easily generalizable to other devices.
>
> Current DPDK flag is checksum bad. I would like to change it to checksum known good. Then drivers which dont' do checksum would leave it 0, but if receive checksum is known good set it to 1.  Basically 1 means known good, and
> 0 means unknown (or bad).  Higher level software can then do sw checksum if necessary.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support
  2013-10-04 19:10         ` jigsaw
@ 2013-10-04 21:19           ` Stephen Hemminger
  2013-10-08  6:59           ` Zhu, Heqing
  1 sibling, 0 replies; 10+ messages in thread
From: Stephen Hemminger @ 2013-10-04 21:19 UTC (permalink / raw)
  To: jigsaw; +Cc: dev

On Fri, 4 Oct 2013 22:10:33 +0300
jigsaw <jigsaw@gmail.com> wrote:

> Hi Stephen,
> 
> Thanks for showing a bigger picture.
> 
> GSO is quite big implementation, that I think it won't be easily
> ported to DPDK. The mbuf needs to be equipped with many fields from
> skb to be able to deal with GSO.
> Do you have the plan to port GSO to DPDK, or you would like to keep
> GSO in scope of virtio?
> 
> Regarding checksum flags, actually I was also thinking of extending
> ol_flags but then I gave it up coz I was worried about the size of
> mbuf.
> My current patch has to push some work to user, due to the fact that
> mbuf delivers too few info (such as L2 and L3 protocol details).
> 
> Besides, as you mentioned, the ixgbe driver doesn't leverage the
> hardware receive checksum offloading at all. And if this is to be
> supported, the checksum flag need further extension.
> (On the other hand, TSO doesn't care about receive checksum offloading).
> Again, do you have plans to extend cksum flags so that virio feels
> more comfortable with DPDK?
> 
> Hi Venky,
> 
> I can either make the commit now as is, or wait till the cksum flags
> extension is in place. If Stephen (or somebody else) has the plan for
> better support for cksum offloading or GSO, it is perhaps better to
> implement TSO on top of that.
> 
> BTW, I have another small question. Current TSO patch offloads the
> TCP/IP pseudo cksum work to user. Do you think DPDK could provide some
> utility functions for TCP/IPv4/IPv6 pseudo cksum calculation and
> updating?
> 
> thx &
> rgds,
> -Qinglai

I want to get Tx checksum offload in virtio working first.
Just looking ahead to Rx.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support
  2013-10-04 19:10         ` jigsaw
  2013-10-04 21:19           ` Stephen Hemminger
@ 2013-10-08  6:59           ` Zhu, Heqing
  2013-10-08  7:56             ` jigsaw
  1 sibling, 1 reply; 10+ messages in thread
From: Zhu, Heqing @ 2013-10-08  6:59 UTC (permalink / raw)
  To: jigsaw, Venkatesan, Venky; +Cc: dev

Hi Qinglai, 

>> Besides, as you mentioned, the ixgbe driver doesn't leverage the hardware receive checksum offloading at all.

On the Rx side, ixgbe driver support the Rx checksum validation via hardware offload. There is a simple example @ DPDK\app\test-pmd\csumonly.c to check IPv4/L4 checksum error~ 

		/* Update the L3/L4 checksum error packet count  */
		rx_bad_ip_csum += (uint16_t) ((pkt_ol_flags & PKT_RX_IP_CKSUM_BAD) != 0);
		rx_bad_l4_csum += (uint16_t) ((pkt_ol_flags & PKT_RX_L4_CKSUM_BAD) != 0);

-----Original Message-----
From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of jigsaw
Sent: Saturday, October 05, 2013 3:11 AM
To: Venkatesan, Venky
Cc: dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support

Hi Stephen,

Thanks for showing a bigger picture.

GSO is quite big implementation, that I think it won't be easily ported to DPDK. The mbuf needs to be equipped with many fields from skb to be able to deal with GSO.
Do you have the plan to port GSO to DPDK, or you would like to keep GSO in scope of virtio?

Regarding checksum flags, actually I was also thinking of extending ol_flags but then I gave it up coz I was worried about the size of mbuf.
My current patch has to push some work to user, due to the fact that mbuf delivers too few info (such as L2 and L3 protocol details).

Besides, as you mentioned, the ixgbe driver doesn't leverage the hardware receive checksum offloading at all. And if this is to be supported, the checksum flag need further extension.
(On the other hand, TSO doesn't care about receive checksum offloading).
Again, do you have plans to extend cksum flags so that virio feels more comfortable with DPDK?

Hi Venky,

I can either make the commit now as is, or wait till the cksum flags extension is in place. If Stephen (or somebody else) has the plan for better support for cksum offloading or GSO, it is perhaps better to implement TSO on top of that.

BTW, I have another small question. Current TSO patch offloads the TCP/IP pseudo cksum work to user. Do you think DPDK could provide some utility functions for TCP/IPv4/IPv6 pseudo cksum calculation and updating?

thx &
rgds,
-Qinglai


On Fri, Oct 4, 2013 at 9:38 PM, Venkatesan, Venky <venky.venkatesan@intel.com> wrote:
> Stephen,
>
> Agree on the checksum flag definition. I'm presuming that we should do this on the L3 and L4 checksums separately (that ol_flags field is another one that needs extension in the mbuf).
>
> Regards,
> -Venky
>
>
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen Hemminger
> Sent: Friday, October 04, 2013 11:23 AM
> To: jigsaw
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] Request for comments on ixgbe TSO 
> support
>
> On Fri, 4 Oct 2013 20:54:31 +0300
> jigsaw <jigsaw@gmail.com> wrote:
>
>> Hi Stephen,
>>
>>
>> >>This will work for local generated packets but overlapping existing field won't work well for forwarding.
>> So adding a new mss field in mbuf could be the way out? or I 
>> misunderstand something.
>>
>> >> What we want to be able to do is to take offload (jumbo) packets 
>> >> in with from virtio
>> Sorry I don't understand why TSO is connected to virtio. Could you 
>> give more details here?
>> Are you suggesting this TSO patch overlaps your work, or it should be 
>> based on your work?
>
> I am working on a better virtio driver. Already have lots more features working, and doing better offload support is planned.
>
> TSO is a subset of the more generic segment offload (GSO) on Linux.
> With virtio is possible to receive GSO packets as well as send them.
> This feature is negotiated between guest and host.
>
> The idea is that between guests they can exchange jumbo (64K) packets even with a smaller MTU. This helps in many ways. One example is only a single route lookup is needed.
>
> Another issue is that the current DPDK model of offload flags for checksum is problematic.
> It matches what is available in Intel hardware and is not easily generalizable to other devices.
>
> Current DPDK flag is checksum bad. I would like to change it to 
> checksum known good. Then drivers which dont' do checksum would leave 
> it 0, but if receive checksum is known good set it to 1.  Basically 1 
> means known good, and
> 0 means unknown (or bad).  Higher level software can then do sw checksum if necessary.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support
  2013-10-08  6:59           ` Zhu, Heqing
@ 2013-10-08  7:56             ` jigsaw
  0 siblings, 0 replies; 10+ messages in thread
From: jigsaw @ 2013-10-08  7:56 UTC (permalink / raw)
  To: Zhu, Heqing; +Cc: dev

Hi Heqing,

Thanks for pointing out the Rx cksum test case.
I was looking for usage of IXGBE_RXDADV_ERR_SHIFT and
IXGBE_RXDADV_ERR_MASK without any result, therefore I assumed these 2
fields are not checked.

So it turns out that the implementation of Rx cksum is in routine
rx_desc_error_to_pkt_flags of ixgbe_rxtx.c. (Correct me if I'm wrong).
This routine, however, doesn't comply with the datasheet.

See end of page 324 of 82599 datasheet:

>>L4E (10) — L4 integrity error is valid only when the L4I bit in the Status field is set. It is
>>active if L4 processing fails (TCP checksum or UDP checksum or SCTP CRC).

Also beginning of section 7.1.11 confirms that the error bits are
valid when status bits (IPCS/L4CS) are turned on.
To be compliant with datasheet, the driver must check respective
status bits before checking error bits in receive descriptor.

thx &
rgds,
-Qinglai


On Tue, Oct 8, 2013 at 9:59 AM, Zhu, Heqing <heqing.zhu@intel.com> wrote:
> Hi Qinglai,
>
>>> Besides, as you mentioned, the ixgbe driver doesn't leverage the hardware receive checksum offloading at all.
>
> On the Rx side, ixgbe driver support the Rx checksum validation via hardware offload. There is a simple example @ DPDK\app\test-pmd\csumonly.c to check IPv4/L4 checksum error~
>
>                 /* Update the L3/L4 checksum error packet count  */
>                 rx_bad_ip_csum += (uint16_t) ((pkt_ol_flags & PKT_RX_IP_CKSUM_BAD) != 0);
>                 rx_bad_l4_csum += (uint16_t) ((pkt_ol_flags & PKT_RX_L4_CKSUM_BAD) != 0);
>
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of jigsaw
> Sent: Saturday, October 05, 2013 3:11 AM
> To: Venkatesan, Venky
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support
>
> Hi Stephen,
>
> Thanks for showing a bigger picture.
>
> GSO is quite big implementation, that I think it won't be easily ported to DPDK. The mbuf needs to be equipped with many fields from skb to be able to deal with GSO.
> Do you have the plan to port GSO to DPDK, or you would like to keep GSO in scope of virtio?
>
> Regarding checksum flags, actually I was also thinking of extending ol_flags but then I gave it up coz I was worried about the size of mbuf.
> My current patch has to push some work to user, due to the fact that mbuf delivers too few info (such as L2 and L3 protocol details).
>
> Besides, as you mentioned, the ixgbe driver doesn't leverage the hardware receive checksum offloading at all. And if this is to be supported, the checksum flag need further extension.
> (On the other hand, TSO doesn't care about receive checksum offloading).
> Again, do you have plans to extend cksum flags so that virio feels more comfortable with DPDK?
>
> Hi Venky,
>
> I can either make the commit now as is, or wait till the cksum flags extension is in place. If Stephen (or somebody else) has the plan for better support for cksum offloading or GSO, it is perhaps better to implement TSO on top of that.
>
> BTW, I have another small question. Current TSO patch offloads the TCP/IP pseudo cksum work to user. Do you think DPDK could provide some utility functions for TCP/IPv4/IPv6 pseudo cksum calculation and updating?
>
> thx &
> rgds,
> -Qinglai
>
>
> On Fri, Oct 4, 2013 at 9:38 PM, Venkatesan, Venky <venky.venkatesan@intel.com> wrote:
>> Stephen,
>>
>> Agree on the checksum flag definition. I'm presuming that we should do this on the L3 and L4 checksums separately (that ol_flags field is another one that needs extension in the mbuf).
>>
>> Regards,
>> -Venky
>>
>>
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen Hemminger
>> Sent: Friday, October 04, 2013 11:23 AM
>> To: jigsaw
>> Cc: dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH] Request for comments on ixgbe TSO
>> support
>>
>> On Fri, 4 Oct 2013 20:54:31 +0300
>> jigsaw <jigsaw@gmail.com> wrote:
>>
>>> Hi Stephen,
>>>
>>>
>>> >>This will work for local generated packets but overlapping existing field won't work well for forwarding.
>>> So adding a new mss field in mbuf could be the way out? or I
>>> misunderstand something.
>>>
>>> >> What we want to be able to do is to take offload (jumbo) packets
>>> >> in with from virtio
>>> Sorry I don't understand why TSO is connected to virtio. Could you
>>> give more details here?
>>> Are you suggesting this TSO patch overlaps your work, or it should be
>>> based on your work?
>>
>> I am working on a better virtio driver. Already have lots more features working, and doing better offload support is planned.
>>
>> TSO is a subset of the more generic segment offload (GSO) on Linux.
>> With virtio is possible to receive GSO packets as well as send them.
>> This feature is negotiated between guest and host.
>>
>> The idea is that between guests they can exchange jumbo (64K) packets even with a smaller MTU. This helps in many ways. One example is only a single route lookup is needed.
>>
>> Another issue is that the current DPDK model of offload flags for checksum is problematic.
>> It matches what is available in Intel hardware and is not easily generalizable to other devices.
>>
>> Current DPDK flag is checksum bad. I would like to change it to
>> checksum known good. Then drivers which dont' do checksum would leave
>> it 0, but if receive checksum is known good set it to 1.  Basically 1
>> means known good, and
>> 0 means unknown (or bad).  Higher level software can then do sw checksum if necessary.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2013-10-08  7:55 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-10-04 17:06 [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support Qinglai Xiao
2013-10-04 17:06 ` [dpdk-dev] [PATCH] ixgbe: TCP/UDP segment offload support on 82599 Qinglai Xiao
2013-10-04 17:40 ` [dpdk-dev] [PATCH] Request for comments on ixgbe TSO support Stephen Hemminger
2013-10-04 17:54   ` jigsaw
2013-10-04 18:23     ` Stephen Hemminger
2013-10-04 18:38       ` Venkatesan, Venky
2013-10-04 19:10         ` jigsaw
2013-10-04 21:19           ` Stephen Hemminger
2013-10-08  6:59           ` Zhu, Heqing
2013-10-08  7:56             ` jigsaw

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).