From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <olivier.matz@6wind.com>
Received: from mail.droids-corp.org (zoll.droids-corp.org [94.23.50.67])
 by dpdk.org (Postfix) with ESMTP id 3427B1B05
 for <dev@dpdk.org>; Fri, 29 Mar 2019 13:54:31 +0100 (CET)
Received: from lfbn-1-5920-128.w90-110.abo.wanadoo.fr ([90.110.126.128]
 helo=droids-corp.org)
 by mail.droids-corp.org with esmtpsa (TLS1.0:RSA_AES_256_CBC_SHA1:256)
 (Exim 4.89) (envelope-from <olivier.matz@6wind.com>)
 id 1h9r43-000281-Hz; Fri, 29 Mar 2019 13:56:57 +0100
Received: by droids-corp.org (sSMTP sendmail emulation);
 Fri, 29 Mar 2019 13:54:27 +0100
Date: Fri, 29 Mar 2019 13:54:27 +0100
From: Olivier Matz <olivier.matz@6wind.com>
To: Konstantin Ananyev <konstantin.ananyev@intel.com>
Cc: dev@dpdk.org, akhil.goyal@nxp.com
Message-ID: <20190329125427.hdwevmm4wwl73tlj@platinum>
References: <20190326154320.29913-1-konstantin.ananyev@intel.com>
 <20190329102726.27716-1-konstantin.ananyev@intel.com>
 <20190329102726.27716-2-konstantin.ananyev@intel.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
In-Reply-To: <20190329102726.27716-2-konstantin.ananyev@intel.com>
User-Agent: NeoMutt/20170113 (1.7.2)
Subject: Re: [dpdk-dev] [PATCH v4 1/9] mbuf: new function to generate raw Tx
 offload value
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Fri, 29 Mar 2019 12:54:31 -0000

Hi Konstantin,

On Fri, Mar 29, 2019 at 10:27:18AM +0000, Konstantin Ananyev wrote:
> Operations to set/update bit-fields often cause compilers
> to generate suboptimal code.
> To help avoid such situation for tx_offload fields:
> introduce new enum for tx_offload bit-fields lengths and offsets,
> and new function to generate raw tx_offload value.
> 
> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> Acked-by: Akhil Goyal <akhil.goyal@nxp.com>

I understand the need. Out of curiosity, do you have any performance
numbers to share?

Few cosmetic questions below.

> ---
>  lib/librte_mbuf/rte_mbuf.h | 79 ++++++++++++++++++++++++++++++++++----
>  1 file changed, 72 insertions(+), 7 deletions(-)
> 
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index d961ccaf6..0b197e8ce 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -479,6 +479,31 @@ struct rte_mbuf_sched {
>  	uint16_t reserved;   /**< Reserved. */
>  }; /**< Hierarchical scheduler */
>  
> +/**
> + * enum for the tx_offload bit-fields lenghts and offsets.
> + * defines the layout of rte_mbuf tx_offload field.
> + */
> +enum {
> +	RTE_MBUF_L2_LEN_BITS = 7,
> +	RTE_MBUF_L3_LEN_BITS = 9,
> +	RTE_MBUF_L4_LEN_BITS = 8,
> +	RTE_MBUF_TSO_SEGSZ_BITS = 16,
> +	RTE_MBUF_OUTL3_LEN_BITS = 9,
> +	RTE_MBUF_OUTL2_LEN_BITS = 7,
> +	RTE_MBUF_L2_LEN_OFS = 0,
> +	RTE_MBUF_L3_LEN_OFS = RTE_MBUF_L2_LEN_OFS + RTE_MBUF_L2_LEN_BITS,
> +	RTE_MBUF_L4_LEN_OFS = RTE_MBUF_L3_LEN_OFS + RTE_MBUF_L3_LEN_BITS,
> +	RTE_MBUF_TSO_SEGSZ_OFS = RTE_MBUF_L4_LEN_OFS + RTE_MBUF_L4_LEN_BITS,
> +	RTE_MBUF_OUTL3_LEN_OFS =
> +		RTE_MBUF_TSO_SEGSZ_OFS + RTE_MBUF_TSO_SEGSZ_BITS,
> +	RTE_MBUF_OUTL2_LEN_OFS =
> +		RTE_MBUF_OUTL3_LEN_OFS + RTE_MBUF_OUTL3_LEN_BITS,
> +	RTE_MBUF_TXOFLD_UNUSED_OFS =
> +		RTE_MBUF_OUTL2_LEN_OFS + RTE_MBUF_OUTL2_LEN_BITS,
> +	RTE_MBUF_TXOFLD_UNUSED_BITS =
> +		sizeof(uint64_t) * CHAR_BIT - RTE_MBUF_TXOFLD_UNUSED_OFS,
> +};
> +

What is the advantage of defining an enum instead of #defines?

In any case, I wonder if it wouldn't be clearer to change the order like
this:

enum {
	RTE_MBUF_L2_LEN_OFS = 0,
	RTE_MBUF_L2_LEN_BITS = 7,
	RTE_MBUF_L3_LEN_OFS = RTE_MBUF_L2_LEN_OFS + RTE_MBUF_L2_LEN_BITS,
	RTE_MBUF_L3_LEN_BITS = 9,
	RTE_MBUF_L4_LEN_OFS = RTE_MBUF_L3_LEN_OFS + RTE_MBUF_L3_LEN_BITS,
	RTE_MBUF_L4_LEN_BITS = 8,
...


>  /**
>   * The generic rte_mbuf, containing a packet mbuf.
>   */
> @@ -640,19 +665,24 @@ struct rte_mbuf {
>  		uint64_t tx_offload;       /**< combined for easy fetch */
>  		__extension__
>  		struct {
> -			uint64_t l2_len:7;
> +			uint64_t l2_len:RTE_MBUF_L2_LEN_BITS;
>  			/**< L2 (MAC) Header Length for non-tunneling pkt.
>  			 * Outer_L4_len + ... + Inner_L2_len for tunneling pkt.
>  			 */
> -			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
> -			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
> -			uint64_t tso_segsz:16; /**< TCP TSO segment size */
> +			uint64_t l3_len:RTE_MBUF_L3_LEN_BITS;
> +			/**< L3 (IP) Header Length. */
> +			uint64_t l4_len:RTE_MBUF_L4_LEN_BITS;
> +			/**< L4 (TCP/UDP) Header Length. */
> +			uint64_t tso_segsz:RTE_MBUF_TSO_SEGSZ_BITS;
> +			/**< TCP TSO segment size */
>  
>  			/* fields for TX offloading of tunnels */
> -			uint64_t outer_l3_len:9; /**< Outer L3 (IP) Hdr Length. */
> -			uint64_t outer_l2_len:7; /**< Outer L2 (MAC) Hdr Length. */
> +			uint64_t outer_l3_len:RTE_MBUF_OUTL3_LEN_BITS;
> +			/**< Outer L3 (IP) Hdr Length. */
> +			uint64_t outer_l2_len:RTE_MBUF_OUTL2_LEN_BITS;
> +			/**< Outer L2 (MAC) Hdr Length. */
>  
> -			/* uint64_t unused:8; */
> +			/* uint64_t unused:RTE_MBUF_TXOFLD_UNUSED_BITS; */
>  		};
>  	};
>  
> @@ -2243,6 +2273,41 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail
>  	return 0;
>  }
>  
> +/*
> + * @warning
> + * @b EXPERIMENTAL: This API may change without prior notice.
> + *
> + * For given input values generate raw tx_offload value.
> + * @param il2
> + *   l2_len value.
> + * @param il3
> + *   l3_len value.
> + * @param il4
> + *   l4_len value.
> + * @param tso
> + *   tso_segsz value.
> + * @param ol3
> + *   outer_l3_len value.
> + * @param ol2
> + *   outer_l2_len value.
> + * @param unused
> + *   unused value.
> + * @return
> + *   raw tx_offload value.
> + */
> +static __rte_always_inline uint64_t
> +rte_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso,
> +	uint64_t ol3, uint64_t ol2, uint64_t unused)
> +{
> +	return il2 << RTE_MBUF_L2_LEN_OFS |
> +		il3 << RTE_MBUF_L3_LEN_OFS |
> +		il4 << RTE_MBUF_L4_LEN_OFS |
> +		tso << RTE_MBUF_TSO_SEGSZ_OFS |
> +		ol3 << RTE_MBUF_OUTL3_LEN_OFS |
> +		ol2 << RTE_MBUF_OUTL2_LEN_OFS |
> +		unused << RTE_MBUF_TXOFLD_UNUSED_OFS;
> +}
> +
>  /**


>>From what I see, the problem is quite similar to what was done with
rte_mbuf_sched_set() recently. So I wondered if it was possible to
declare a structure like this:

	struct rte_mbuf_ol_len {
	        uint64_t l2_len:7;
	        uint64_t l3_len:9; /**< L3 (IP) Header Length. */
	        uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
		...
	}

And have the set function like this:

        m->l = (struct rte_mbuf_ol_len) {
                .l2_len = l2_len,
                .l3_len = l3_len,
                .l4_len = l4_len,
		...

This would avoid the definition of the offsets and bits, but I didn't
find any way to declare these fields as anonymous in the mbuf structure.
Did you tried that way too?


Thanks,
Olivier

From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <dev-bounces@dpdk.org>
Received: from dpdk.org (dpdk.org [92.243.14.124])
	by dpdk.space (Postfix) with ESMTP id 97D25A05D3
	for <public@inbox.dpdk.org>; Fri, 29 Mar 2019 13:54:34 +0100 (CET)
Received: from [92.243.14.124] (localhost [127.0.0.1])
	by dpdk.org (Postfix) with ESMTP id 118E82BD3;
	Fri, 29 Mar 2019 13:54:33 +0100 (CET)
Received: from mail.droids-corp.org (zoll.droids-corp.org [94.23.50.67])
 by dpdk.org (Postfix) with ESMTP id 3427B1B05
 for <dev@dpdk.org>; Fri, 29 Mar 2019 13:54:31 +0100 (CET)
Received: from lfbn-1-5920-128.w90-110.abo.wanadoo.fr ([90.110.126.128]
 helo=droids-corp.org)
 by mail.droids-corp.org with esmtpsa (TLS1.0:RSA_AES_256_CBC_SHA1:256)
 (Exim 4.89) (envelope-from <olivier.matz@6wind.com>)
 id 1h9r43-000281-Hz; Fri, 29 Mar 2019 13:56:57 +0100
Received: by droids-corp.org (sSMTP sendmail emulation);
 Fri, 29 Mar 2019 13:54:27 +0100
Date: Fri, 29 Mar 2019 13:54:27 +0100
From: Olivier Matz <olivier.matz@6wind.com>
To: Konstantin Ananyev <konstantin.ananyev@intel.com>
Cc: dev@dpdk.org, akhil.goyal@nxp.com
Message-ID: <20190329125427.hdwevmm4wwl73tlj@platinum>
References: <20190326154320.29913-1-konstantin.ananyev@intel.com>
 <20190329102726.27716-1-konstantin.ananyev@intel.com>
 <20190329102726.27716-2-konstantin.ananyev@intel.com>
MIME-Version: 1.0
Content-Type: text/plain; charset="UTF-8"
Content-Disposition: inline
In-Reply-To: <20190329102726.27716-2-konstantin.ananyev@intel.com>
User-Agent: NeoMutt/20170113 (1.7.2)
Subject: Re: [dpdk-dev] [PATCH v4 1/9] mbuf: new function to generate raw Tx
 offload value
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org
Sender: "dev" <dev-bounces@dpdk.org>
Message-ID: <20190329125427.UpB6ECVyTeRGLly1FVI_mSbk2MmkP5coZF8bLkpfX1w@z>

Hi Konstantin,

On Fri, Mar 29, 2019 at 10:27:18AM +0000, Konstantin Ananyev wrote:
> Operations to set/update bit-fields often cause compilers
> to generate suboptimal code.
> To help avoid such situation for tx_offload fields:
> introduce new enum for tx_offload bit-fields lengths and offsets,
> and new function to generate raw tx_offload value.
> 
> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> Acked-by: Akhil Goyal <akhil.goyal@nxp.com>

I understand the need. Out of curiosity, do you have any performance
numbers to share?

Few cosmetic questions below.

> ---
>  lib/librte_mbuf/rte_mbuf.h | 79 ++++++++++++++++++++++++++++++++++----
>  1 file changed, 72 insertions(+), 7 deletions(-)
> 
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index d961ccaf6..0b197e8ce 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -479,6 +479,31 @@ struct rte_mbuf_sched {
>  	uint16_t reserved;   /**< Reserved. */
>  }; /**< Hierarchical scheduler */
>  
> +/**
> + * enum for the tx_offload bit-fields lenghts and offsets.
> + * defines the layout of rte_mbuf tx_offload field.
> + */
> +enum {
> +	RTE_MBUF_L2_LEN_BITS = 7,
> +	RTE_MBUF_L3_LEN_BITS = 9,
> +	RTE_MBUF_L4_LEN_BITS = 8,
> +	RTE_MBUF_TSO_SEGSZ_BITS = 16,
> +	RTE_MBUF_OUTL3_LEN_BITS = 9,
> +	RTE_MBUF_OUTL2_LEN_BITS = 7,
> +	RTE_MBUF_L2_LEN_OFS = 0,
> +	RTE_MBUF_L3_LEN_OFS = RTE_MBUF_L2_LEN_OFS + RTE_MBUF_L2_LEN_BITS,
> +	RTE_MBUF_L4_LEN_OFS = RTE_MBUF_L3_LEN_OFS + RTE_MBUF_L3_LEN_BITS,
> +	RTE_MBUF_TSO_SEGSZ_OFS = RTE_MBUF_L4_LEN_OFS + RTE_MBUF_L4_LEN_BITS,
> +	RTE_MBUF_OUTL3_LEN_OFS =
> +		RTE_MBUF_TSO_SEGSZ_OFS + RTE_MBUF_TSO_SEGSZ_BITS,
> +	RTE_MBUF_OUTL2_LEN_OFS =
> +		RTE_MBUF_OUTL3_LEN_OFS + RTE_MBUF_OUTL3_LEN_BITS,
> +	RTE_MBUF_TXOFLD_UNUSED_OFS =
> +		RTE_MBUF_OUTL2_LEN_OFS + RTE_MBUF_OUTL2_LEN_BITS,
> +	RTE_MBUF_TXOFLD_UNUSED_BITS =
> +		sizeof(uint64_t) * CHAR_BIT - RTE_MBUF_TXOFLD_UNUSED_OFS,
> +};
> +

What is the advantage of defining an enum instead of #defines?

In any case, I wonder if it wouldn't be clearer to change the order like
this:

enum {
	RTE_MBUF_L2_LEN_OFS = 0,
	RTE_MBUF_L2_LEN_BITS = 7,
	RTE_MBUF_L3_LEN_OFS = RTE_MBUF_L2_LEN_OFS + RTE_MBUF_L2_LEN_BITS,
	RTE_MBUF_L3_LEN_BITS = 9,
	RTE_MBUF_L4_LEN_OFS = RTE_MBUF_L3_LEN_OFS + RTE_MBUF_L3_LEN_BITS,
	RTE_MBUF_L4_LEN_BITS = 8,
...


>  /**
>   * The generic rte_mbuf, containing a packet mbuf.
>   */
> @@ -640,19 +665,24 @@ struct rte_mbuf {
>  		uint64_t tx_offload;       /**< combined for easy fetch */
>  		__extension__
>  		struct {
> -			uint64_t l2_len:7;
> +			uint64_t l2_len:RTE_MBUF_L2_LEN_BITS;
>  			/**< L2 (MAC) Header Length for non-tunneling pkt.
>  			 * Outer_L4_len + ... + Inner_L2_len for tunneling pkt.
>  			 */
> -			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
> -			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
> -			uint64_t tso_segsz:16; /**< TCP TSO segment size */
> +			uint64_t l3_len:RTE_MBUF_L3_LEN_BITS;
> +			/**< L3 (IP) Header Length. */
> +			uint64_t l4_len:RTE_MBUF_L4_LEN_BITS;
> +			/**< L4 (TCP/UDP) Header Length. */
> +			uint64_t tso_segsz:RTE_MBUF_TSO_SEGSZ_BITS;
> +			/**< TCP TSO segment size */
>  
>  			/* fields for TX offloading of tunnels */
> -			uint64_t outer_l3_len:9; /**< Outer L3 (IP) Hdr Length. */
> -			uint64_t outer_l2_len:7; /**< Outer L2 (MAC) Hdr Length. */
> +			uint64_t outer_l3_len:RTE_MBUF_OUTL3_LEN_BITS;
> +			/**< Outer L3 (IP) Hdr Length. */
> +			uint64_t outer_l2_len:RTE_MBUF_OUTL2_LEN_BITS;
> +			/**< Outer L2 (MAC) Hdr Length. */
>  
> -			/* uint64_t unused:8; */
> +			/* uint64_t unused:RTE_MBUF_TXOFLD_UNUSED_BITS; */
>  		};
>  	};
>  
> @@ -2243,6 +2273,41 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail
>  	return 0;
>  }
>  
> +/*
> + * @warning
> + * @b EXPERIMENTAL: This API may change without prior notice.
> + *
> + * For given input values generate raw tx_offload value.
> + * @param il2
> + *   l2_len value.
> + * @param il3
> + *   l3_len value.
> + * @param il4
> + *   l4_len value.
> + * @param tso
> + *   tso_segsz value.
> + * @param ol3
> + *   outer_l3_len value.
> + * @param ol2
> + *   outer_l2_len value.
> + * @param unused
> + *   unused value.
> + * @return
> + *   raw tx_offload value.
> + */
> +static __rte_always_inline uint64_t
> +rte_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso,
> +	uint64_t ol3, uint64_t ol2, uint64_t unused)
> +{
> +	return il2 << RTE_MBUF_L2_LEN_OFS |
> +		il3 << RTE_MBUF_L3_LEN_OFS |
> +		il4 << RTE_MBUF_L4_LEN_OFS |
> +		tso << RTE_MBUF_TSO_SEGSZ_OFS |
> +		ol3 << RTE_MBUF_OUTL3_LEN_OFS |
> +		ol2 << RTE_MBUF_OUTL2_LEN_OFS |
> +		unused << RTE_MBUF_TXOFLD_UNUSED_OFS;
> +}
> +
>  /**


>From what I see, the problem is quite similar to what was done with
rte_mbuf_sched_set() recently. So I wondered if it was possible to
declare a structure like this:

	struct rte_mbuf_ol_len {
	        uint64_t l2_len:7;
	        uint64_t l3_len:9; /**< L3 (IP) Header Length. */
	        uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
		...
	}

And have the set function like this:

        m->l = (struct rte_mbuf_ol_len) {
                .l2_len = l2_len,
                .l3_len = l3_len,
                .l4_len = l4_len,
		...

This would avoid the definition of the offsets and bits, but I didn't
find any way to declare these fields as anonymous in the mbuf structure.
Did you tried that way too?


Thanks,
Olivier