DPDK patches and discussions
 help / color / mirror / Atom feed
From: Maryam Tahhan <mtahhan@redhat.com>
To: Ariel Otilibili <ariel.otilibili@6wind.com>, dev@dpdk.org
Cc: stable@dpdk.org, Thomas Monjalon <thomas@monjalon.net>,
	David Marchand <david.marchand@redhat.com>,
	Stephen Hemminger <stephen@networkplumber.org>,
	Ciara Loftus <ciara.loftus@intel.com>
Subject: Re: [PATCH v3 2/2] net/af_xdp: Refactor af_xdp_tx_zc()
Date: Wed, 29 Jan 2025 17:58:10 +0000	[thread overview]
Message-ID: <f067b5ea-ade9-4b2d-8a66-9f8446138bb5@redhat.com> (raw)
In-Reply-To: <20250128231152.249497-3-ariel.otilibili@6wind.com>


On 28/01/2025 23:11, Ariel Otilibili wrote:
> Both legs of the loop share the same logic: either to go out of the
> function, or to fall through to the next instructions.
>
> For that, maybe_cpy_pkt() is introduced.
>
> Bugzilla ID: 1440
> Signed-off-by: Ariel Otilibili <ariel.otilibili@6wind.com>
> ---
>   drivers/net/af_xdp/rte_eth_af_xdp.c | 131 ++++++++++++++++++----------
>   1 file changed, 84 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
> index 092bcb73aa0a..efe23283fb52 100644
> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
> @@ -536,13 +536,80 @@ kick_tx(struct pkt_tx_queue *txq, struct xsk_ring_cons *cq)
>   		}
>   }
>   
> +static inline uint64_t
> +update_addr(struct rte_mbuf *mbuf, struct xsk_umem_info *umem)
> +{
> +	return (uint64_t)mbuf - (uint64_t)umem->buffer
> +		- umem->mb_pool->header_size;
> +}
> +
> +static inline uint64_t
> +update_offset(struct rte_mbuf *mbuf, struct xsk_umem_info *umem)
> +{
> +	uint64_t offset;
> +
> +	offset = rte_pktmbuf_mtod(mbuf, uint64_t) - (uint64_t)mbuf
> +		+ umem->mb_pool->header_size;
> +	return offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
> +}
> +
> +static struct rte_mbuf *
> +maybe_kick_tx(struct pkt_tx_queue *txq, uint32_t *idx_tx, struct rte_mbuf *mbuf)
> +{
> +	struct rte_mbuf *ret = mbuf;
> +
> +	if (!xsk_ring_prod__reserve(&txq->tx, 1, idx_tx)) {
> +		kick_tx(txq, &txq->pair->cq);
> +		if (!xsk_ring_prod__reserve(&txq->tx, 1, idx_tx))
> +			ret = NULL;
> +	}
> +
> +	return ret;
> +}


[MT] I don't see why we are passing in mbuf here?


> +
> +static struct rte_mbuf *
> +maybe_create_mbuf(struct pkt_tx_queue *txq, uint32_t *idx_tx,
> +		  struct xsk_umem_info *umem) {
> +	struct rte_mbuf *local_mbuf = rte_pktmbuf_alloc(umem->mb_pool);
> +
> +	if (local_mbuf == NULL)
> +		goto out;
> +
> +	if (!xsk_ring_prod__reserve(&txq->tx, 1, idx_tx)) {
> +		rte_pktmbuf_free(local_mbuf);
> +		local_mbuf = NULL;
> +		goto out;
> +	}
> +
> +out:
> +	return local_mbuf;
> +}
> +
> +static void
> +maybe_cpy_pkt(bool is_mbuf_equal, struct xsk_umem_info *umem,
> +	      uint64_t addr_plus_offset, struct rte_mbuf *mbuf,
> +	      struct xdp_desc *desc)
> +{
> +	void *pkt;
> +
> +	if(is_mbuf_equal)
> +		goto out;
> +
> +	pkt = xsk_umem__get_data(umem->buffer, addr_plus_offset);
> +	rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len);
> +	rte_pktmbuf_free(mbuf);
> +
> +out:
> +	return;
> +}


[MT] does this really need to be an inline function? it wasn't common 
code between the blocks?


> +
>   #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
>   static uint16_t
>   af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   {
>   	struct pkt_tx_queue *txq = queue;
>   	struct xsk_umem_info *umem = txq->umem;
> -	struct rte_mbuf *mbuf;
> +	struct rte_mbuf *mbuf, *local_mbuf;
>   	unsigned long tx_bytes = 0;
>   	int i;
>   	uint32_t idx_tx;
> @@ -551,61 +618,31 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   	uint64_t addr, offset;
>   	struct xsk_ring_cons *cq = &txq->pair->cq;
>   	uint32_t free_thresh = cq->size >> 1;
> +	bool is_true;
>   
>   	if (xsk_cons_nb_avail(cq, free_thresh) >= free_thresh)
>   		pull_umem_cq(umem, XSK_RING_CONS__DEFAULT_NUM_DESCS, cq);
>   
>   	for (i = 0; i < nb_pkts; i++) {
>   		mbuf = bufs[i];
> +		is_true = mbuf->pool == umem->mb_pool;
>   
> -		if (mbuf->pool == umem->mb_pool) {
> -			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
> -				kick_tx(txq, cq);
> -				if (!xsk_ring_prod__reserve(&txq->tx, 1,
> -							    &idx_tx))
> -					goto out;
> -			}
> -			desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
> -			desc->len = mbuf->pkt_len;
> -			addr = (uint64_t)mbuf - (uint64_t)umem->buffer -
> -					umem->mb_pool->header_size;
> -			offset = rte_pktmbuf_mtod(mbuf, uint64_t) -
> -					(uint64_t)mbuf +
> -					umem->mb_pool->header_size;
> -			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
> -			desc->addr = addr | offset;
> -			tx_bytes += desc->len;
> -			count++;
> -		} else {
> -			struct rte_mbuf *local_mbuf =
> -					rte_pktmbuf_alloc(umem->mb_pool);
> -			void *pkt;
> -
> -			if (local_mbuf == NULL)
> -				goto out;
> +		if (is_true)
> +			local_mbuf = maybe_kick_tx(txq, &idx_tx, mbuf);
> +		else
> +			local_mbuf = maybe_create_mbuf(txq, &idx_tx, umem);
>   
> -			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
> -				rte_pktmbuf_free(local_mbuf);
> -				goto out;
> -			}
> +		if (!local_mbuf)
> +		  goto out;
>   
> -			desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
> -			desc->len = mbuf->pkt_len;
> -
> -			addr = (uint64_t)local_mbuf - (uint64_t)umem->buffer -
> -					umem->mb_pool->header_size;
> -			offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) -
> -					(uint64_t)local_mbuf +
> -					umem->mb_pool->header_size;
> -			pkt = xsk_umem__get_data(umem->buffer, addr + offset);
> -			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
> -			desc->addr = addr | offset;
> -			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
> -					desc->len);
> -			tx_bytes += desc->len;
> -			rte_pktmbuf_free(mbuf);
> -			count++;
> -		}
> +		desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
> +		desc->len = mbuf->pkt_len;
> +		addr = update_addr(local_mbuf, umem);
> +		offset = update_offset(local_mbuf, umem);
> +		desc->addr = addr | offset;
> +		maybe_cpy_pkt(is_true, umem, addr + offset, mbuf, desc);
> +		tx_bytes += desc->len;
> +		count++;
>   	}
>   
>   out:


Firstly thank you for your efforts to clean up the code. I believe a 
simpler cleanup approach would better balance readability + 
maintainability. This approach would focus on eliminating the repeated 
code in both branches of the conditional block. For me the main 
repetition between the branches is the code that reserves and fills the 
descriptor info, Please find an example below...

Note: The following is all untested code (don't even know if it 
compiles) it's just to give an idea around the cleanup I had in mind:

static inline int
reserve_and_fill_desc(struct pkt_tx_queue *txq, struct xdp_desc **desc,
                       struct rte_mbuf *mbuf, struct xsk_umem_info *umem, uint32_t *idx_tx)
{
     uint64_t addr, offset;

     if (!xsk_ring_prod__reserve(&txq->tx, 1, idx_tx))
         return -1;

     *desc = xsk_ring_prod__tx_desc(&txq->tx, *idx_tx);
     (*desc)->len = mbuf->pkt_len;

     addr = (uint64_t)mbuf - (uint64_t)umem->buffer - umem->mb_pool->header_size;
     offset = rte_pktmbuf_mtod(mbuf, uint64_t) - (uint64_t)mbuf +
              umem->mb_pool->header_size;
     offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
     (*desc)->addr = addr | offset;

     return 0;
}


Then the main function would look something like:


static uint16_t
af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
{
     struct pkt_tx_queue *txq = queue;
     struct xsk_umem_info *umem = txq->umem;
     struct rte_mbuf *mbuf;
     unsigned long tx_bytes = 0;
     int i;
     uint32_t idx_tx;
     uint16_t count = 0;
     struct xdp_desc *desc;
     struct xsk_ring_cons *cq = &txq->pair->cq;
     uint32_t free_thresh = cq->size >> 1;

     if (xsk_cons_nb_avail(cq, free_thresh) >= free_thresh)
         pull_umem_cq(umem, XSK_RING_CONS__DEFAULT_NUM_DESCS, cq);

     for (i = 0; i < nb_pkts; i++) {
         mbuf = bufs[i];

         if (mbuf->pool == umem->mb_pool) {
             if (reserve_and_fill_desc(txq, &desc, mbuf, umem, &idx_tx) < 0) {
                 kick_tx(txq, cq);
                 if (reserve_and_fill_desc(txq, &desc, mbuf, umem, &idx_tx) < 0)
                     goto out;
             }
             
             tx_bytes += desc->len;
             count++;
         } else {
             struct rte_mbuf *local_mbuf = rte_pktmbuf_alloc(umem->mb_pool);
             void *pkt;

             if (local_mbuf == NULL)
                 goto out;

             if (reserve_and_fill_desc(txq, &desc, local_mbuf, umem, &idx_tx) < 0) {
                 rte_pktmbuf_free(local_mbuf);
                 goto out;
             }

             pkt = xsk_umem__get_data(umem->buffer, (desc->addr & ~0xFFF) +
                                      (desc->addr & 0xFFF));
             rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len);
             rte_pktmbuf_free(mbuf);
             tx_bytes += desc->len;
             count++;
         }


     }

out:
     xsk_ring_prod__submit(&txq->tx, count);
     kick_tx(txq, cq);

     txq->stats.tx_pkts += count;
     txq->stats.tx_bytes += tx_bytes;
     txq->stats.tx_dropped += nb_pkts - count;

     return count;
}


Please let me know your thoughts, and I’d be happy to discuss further


      reply	other threads:[~2025-01-29 17:58 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-16 19:56 [PATCH 0/2] Fix use after free, and refactor af_xdp_tx_zc() Ariel Otilibili
2025-01-16 19:56 ` [PATCH 1/2] net/af_xdp: fix use after free in af_xdp_tx_zc() Ariel Otilibili
2025-01-30 18:24   ` Stephen Hemminger
2025-01-16 19:56 ` [PATCH 2/2] net/af_xdp: Refactor af_xdp_tx_zc() Ariel Otilibili
2025-01-16 21:47   ` Stephen Hemminger
2025-01-16 22:20     ` Ariel Otilibili
2025-01-16 22:26       ` Stephen Hemminger
2025-01-16 22:36         ` Ariel Otilibili
2025-01-16 22:51 ` [PATCH v2 0/2] Fix use after free, and refactor af_xdp_tx_zc() Ariel Otilibili
2025-01-16 22:51   ` [PATCH v2 1/2] net/af_xdp: fix use after free in af_xdp_tx_zc() Ariel Otilibili
2025-01-20 14:54     ` Maryam Tahhan
2025-01-21  8:54       ` Ariel Otilibili
2025-01-16 22:51   ` [PATCH v2 2/2] net/af_xdp: Refactor af_xdp_tx_zc() Ariel Otilibili
2025-01-20 15:28     ` Maryam Tahhan
2025-01-21  8:57       ` Ariel Otilibili
2025-01-28 23:11 ` [PATCH v3 0/2] Fix use after free, and refactor af_xdp_tx_zc() Ariel Otilibili
2025-01-28 23:11   ` [PATCH v3 1/2] net/af_xdp: fix use after free in af_xdp_tx_zc() Ariel Otilibili
2025-01-28 23:11   ` [PATCH v3 2/2] net/af_xdp: Refactor af_xdp_tx_zc() Ariel Otilibili
2025-01-29 17:58     ` Maryam Tahhan [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f067b5ea-ade9-4b2d-8a66-9f8446138bb5@redhat.com \
    --to=mtahhan@redhat.com \
    --cc=ariel.otilibili@6wind.com \
    --cc=ciara.loftus@intel.com \
    --cc=david.marchand@redhat.com \
    --cc=dev@dpdk.org \
    --cc=stable@dpdk.org \
    --cc=stephen@networkplumber.org \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).