Re: [PATCH] ip_frag: add IPv4 fast fragment switch and test data

DPDK patches and discussions
 help / color / mirror / Atom feed

From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
To: Huichao Cai <chcchc88@163.com>, dev@dpdk.org
Cc: konstantin.ananyev@intel.com
Subject: Re: [PATCH] ip_frag: add IPv4 fast fragment switch and test data
Date: Sat, 4 Jun 2022 00:50:22 +0100	[thread overview]
Message-ID: <a35a980e-61db-7e71-ded9-8b0b1ac08b92@yandex.ru> (raw)
In-Reply-To: <1654161183-5391-1-git-send-email-chcchc88@163.com>


> Some NIC drivers support DEV_TX_OFFLOAD_MBUF_FAST_FREE offload(
> Device supports optimization for fast release of mbufs.When set
> application must guarantee that per-queue all mbufs comes from the
> same mempool and has refcnt = 1).In order to adapt to this offload
> function,we need to modify the existing fragment logic(attach mbuf,
> so it is fast,we can call it fast fragment mode) and add the fragment
> logic in the non-attach mbuf mode(slow fragment mode).Add some test
> data for this modification.


That doesn't look like  a good idea to me.
Yes, drivers configured with MBUF_FAST_FREE would not work
correctly with indirect mbufs.
So it is application responsibility to choose what it wants:
either indirect mbufs enabled or MBUF_FAST_FREE enabled.
Inside the lib we shouldn't try to guess what was particular
driver configuration.
Plus it is the change (and regression) of existing behavior -
right now it is perfectly fine to use the same mbuf for both
direct and indirect mbufs.
If you really have a use-case for doing fragmentation via
full copying all packet data, then probably the easiest and
safest way would be to introduce new function:
rte_ipv4_fragment_clone_packet(...) or so.


> 
> Signed-off-by: Huichao Cai <chcchc88@163.com>
> ---
>   app/test/test_ipfrag.c               | 14 +++++++--
>   lib/ip_frag/rte_ipv4_fragmentation.c | 56 +++++++++++++++++++++++++-----------
>   2 files changed, 51 insertions(+), 19 deletions(-)
> 
> diff --git a/app/test/test_ipfrag.c b/app/test/test_ipfrag.c
> index 610a86b..f5fe4b8 100644
> --- a/app/test/test_ipfrag.c
> +++ b/app/test/test_ipfrag.c
> @@ -407,12 +407,20 @@ static void ut_teardown(void)
>   					      pktid);
>   		}
>   
> -		if (tests[i].ipv == 4)
> -			len = rte_ipv4_fragment_packet(b, pkts_out, BURST,
> +		if (tests[i].ipv == 4) {
> +			if (i % 2)
> +				len = rte_ipv4_fragment_packet(b,
> +						       pkts_out, BURST,
>   						       tests[i].mtu_size,
>   						       direct_pool,
>   						       indirect_pool);
> -		else if (tests[i].ipv == 6)
> +			else
> +				len = rte_ipv4_fragment_packet(b,
> +						       pkts_out, BURST,
> +							   tests[i].mtu_size,
> +							   direct_pool,
> +							   direct_pool);
> +		} else if (tests[i].ipv == 6)
>   			len = rte_ipv6_fragment_packet(b, pkts_out, BURST,
>   						       tests[i].mtu_size,
>   						       direct_pool,
> diff --git a/lib/ip_frag/rte_ipv4_fragmentation.c b/lib/ip_frag/rte_ipv4_fragmentation.c
> index a562424..65bfad7 100644
> --- a/lib/ip_frag/rte_ipv4_fragmentation.c
> +++ b/lib/ip_frag/rte_ipv4_fragmentation.c
> @@ -102,6 +102,11 @@ static inline uint16_t __create_ipopt_frag_hdr(uint8_t *iph,
>    *   MBUF pool used for allocating direct buffers for the output fragments.
>    * @param pool_indirect
>    *   MBUF pool used for allocating indirect buffers for the output fragments.
> + *   If pool_indirect == pool_direct,this means that the fragment will adapt
> + *   to DEV_TX_OFFLOAD_MBUF_FAST_FREE offload.
> + *   DEV_TX_OFFLOAD_MBUF_FAST_FREE: Device supports optimization
> + *   for fast release of mbufs. When set application must guarantee that
> + *   per-queue all mbufs comes from the same mempool and has refcnt = 1.
>    * @return
>    *   Upon successful completion - number of output fragments placed
>    *   in the pkts_out array.
> @@ -123,6 +128,7 @@ static inline uint16_t __create_ipopt_frag_hdr(uint8_t *iph,
>   	uint16_t frag_bytes_remaining;
>   	uint8_t ipopt_frag_hdr[IPV4_HDR_MAX_LEN];
>   	uint16_t ipopt_len;
> +	bool is_fast_frag_mode = true;
>   
>   	/*
>   	 * Formal parameter checking.
> @@ -133,6 +139,9 @@ static inline uint16_t __create_ipopt_frag_hdr(uint8_t *iph,
>   	    unlikely(mtu_size < RTE_ETHER_MIN_MTU))
>   		return -EINVAL;
>   
> +	if (pool_indirect == pool_direct)
> +		is_fast_frag_mode = false;
> +
>   	in_hdr = rte_pktmbuf_mtod(pkt_in, struct rte_ipv4_hdr *);
>   	header_len = (in_hdr->version_ihl & RTE_IPV4_HDR_IHL_MASK) *
>   	    RTE_IPV4_IHL_MULTIPLIER;
> @@ -190,30 +199,45 @@ static inline uint16_t __create_ipopt_frag_hdr(uint8_t *iph,
>   		out_seg_prev = out_pkt;
>   		more_out_segs = 1;
>   		while (likely(more_out_segs && more_in_segs)) {
> -			struct rte_mbuf *out_seg = NULL;
>   			uint32_t len;
>   
> -			/* Allocate indirect buffer */
> -			out_seg = rte_pktmbuf_alloc(pool_indirect);
> -			if (unlikely(out_seg == NULL)) {
> -				rte_pktmbuf_free(out_pkt);
> -				__free_fragments(pkts_out, out_pkt_pos);
> -				return -ENOMEM;
> -			}
> -			out_seg_prev->next = out_seg;
> -			out_seg_prev = out_seg;
> -
> -			/* Prepare indirect buffer */
> -			rte_pktmbuf_attach(out_seg, in_seg);
>   			len = frag_bytes_remaining;
>   			if (len > (in_seg->data_len - in_seg_data_pos)) {
>   				len = in_seg->data_len - in_seg_data_pos;
>   			}
> -			out_seg->data_off = in_seg->data_off + in_seg_data_pos;
> -			out_seg->data_len = (uint16_t)len;
> +
> +			if (is_fast_frag_mode) {
> +				struct rte_mbuf *out_seg = NULL;
> +				/* Allocate indirect buffer */
> +				out_seg = rte_pktmbuf_alloc(pool_indirect);
> +				if (unlikely(out_seg == NULL)) {
> +					rte_pktmbuf_free(out_pkt);
> +					__free_fragments(pkts_out, out_pkt_pos);
> +					return -ENOMEM;
> +				}
> +				out_seg_prev->next = out_seg;
> +				out_seg_prev = out_seg;
> +
> +				/* Prepare indirect buffer */
> +				rte_pktmbuf_attach(out_seg, in_seg);
> +
> +				out_seg->data_off = in_seg->data_off +
> +					in_seg_data_pos;
> +				out_seg->data_len = (uint16_t)len;
> +				out_pkt->nb_segs += 1;
> +			} else {
> +				rte_memcpy(
> +				    rte_pktmbuf_mtod_offset(out_pkt, char *,
> +					    out_pkt->pkt_len),
> +				    rte_pktmbuf_mtod_offset(in_seg, char *,
> +					    in_seg_data_pos),
> +				    len);
> +				out_pkt->data_len = (uint16_t)(len +
> +				    out_pkt->data_len);
> +			}
> +
>   			out_pkt->pkt_len = (uint16_t)(len +
>   			    out_pkt->pkt_len);
> -			out_pkt->nb_segs += 1;
>   			in_seg_data_pos += len;
>   			frag_bytes_remaining -= len;
>

next prev parent reply	other threads:[~2022-06-03 23:50 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-02  9:13 Huichao Cai
2022-06-03 23:50 ` Konstantin Ananyev [this message]
2022-06-04  2:13   ` Huichao Cai
2022-06-04  2:19   ` Huichao Cai
2022-06-04 11:36     ` Konstantin Ananyev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a35a980e-61db-7e71-ded9-8b0b1ac08b92@yandex.ru \
    --to=konstantin.v.ananyev@yandex.ru \
    --cc=chcchc88@163.com \
    --cc=dev@dpdk.org \
    --cc=konstantin.ananyev@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).