DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH 1/2] vhost: fix memory leak in Virtio Tx split path
@ 2024-01-31  9:31 Maxime Coquelin
  2024-01-31  9:31 ` [PATCH 2/2] vhost: add new mbuf allocation failure statistic Maxime Coquelin
  2024-01-31 13:19 ` [PATCH 1/2] vhost: fix memory leak in Virtio Tx split path David Marchand
  0 siblings, 2 replies; 4+ messages in thread
From: Maxime Coquelin @ 2024-01-31  9:31 UTC (permalink / raw)
  To: dev, chenbox, david.marchand, bnemeth, echaudro; +Cc: Maxime Coquelin, stable

When vIOMMU is enabled and Virtio device is bound to kernel
driver in guest, rte_vhost_dequeue_burst() will often return
early because of IOTLB misses.

This patch fixes a mbuf leak occurring in this case.

Fixes: 242695f6122a ("vhost: allocate and free packets in bulk in Tx split")
Cc: stable@dpdk.org

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/virtio_net.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 280d4845f8..db9985c9b9 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -3120,11 +3120,8 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 						VHOST_ACCESS_RO) < 0))
 			break;
 
-		update_shadow_used_ring_split(vq, head_idx, 0);
-
 		if (unlikely(buf_len <= dev->vhost_hlen)) {
-			dropped += 1;
-			i++;
+			dropped = 1;
 			break;
 		}
 
@@ -3143,8 +3140,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 					buf_len, mbuf_pool->name);
 				allocerr_warned = true;
 			}
-			dropped += 1;
-			i++;
+			dropped = 1;
 			break;
 		}
 
@@ -3155,17 +3151,17 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 				VHOST_DATA_LOG(dev->ifname, ERR, "failed to copy desc to mbuf.");
 				allocerr_warned = true;
 			}
-			dropped += 1;
-			i++;
+			dropped = 1;
 			break;
 		}
 
+		update_shadow_used_ring_split(vq, head_idx, 0);
 	}
 
-	if (dropped)
-		rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1);
+	if (unlikely(count != i))
+		rte_pktmbuf_free_bulk(&pkts[i], count - i);
 
-	vq->last_avail_idx += i;
+	vq->last_avail_idx += i + dropped;
 
 	do_data_copy_dequeue(vq);
 	if (unlikely(i < count))
@@ -3175,7 +3171,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		vhost_vring_call_split(dev, vq);
 	}
 
-	return (i - dropped);
+	return i;
 }
 
 __rte_noinline
-- 
2.43.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 2/2] vhost: add new mbuf allocation failure statistic
  2024-01-31  9:31 [PATCH 1/2] vhost: fix memory leak in Virtio Tx split path Maxime Coquelin
@ 2024-01-31  9:31 ` Maxime Coquelin
  2024-01-31 13:19 ` [PATCH 1/2] vhost: fix memory leak in Virtio Tx split path David Marchand
  1 sibling, 0 replies; 4+ messages in thread
From: Maxime Coquelin @ 2024-01-31  9:31 UTC (permalink / raw)
  To: dev, chenbox, david.marchand, bnemeth, echaudro; +Cc: Maxime Coquelin

This patch introduces a new, per virtqueue, mbuf allocation
failure statistic. It can be useful to troubleshoot packets
drops due to insufficient mempool size or memory leaks.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/vhost.c      |  1 +
 lib/vhost/vhost.h      |  1 +
 lib/vhost/virtio_net.c | 17 +++++++++++++----
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 5912a42979..ac71d17784 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -55,6 +55,7 @@ static const struct vhost_vq_stats_name_off vhost_vq_stat_strings[] = {
 	{"iotlb_misses",           offsetof(struct vhost_virtqueue, stats.iotlb_misses)},
 	{"inflight_submitted",     offsetof(struct vhost_virtqueue, stats.inflight_submitted)},
 	{"inflight_completed",     offsetof(struct vhost_virtqueue, stats.inflight_completed)},
+	{"mbuf_alloc_failed",      offsetof(struct vhost_virtqueue, stats.mbuf_alloc_failed)},
 };
 
 #define VHOST_NB_VQ_STATS RTE_DIM(vhost_vq_stat_strings)
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 470dadbba6..371c3e3858 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -156,6 +156,7 @@ struct virtqueue_stats {
 	uint64_t iotlb_misses;
 	uint64_t inflight_submitted;
 	uint64_t inflight_completed;
+	uint64_t mbuf_alloc_failed;
 	uint64_t guest_notifications_suppressed;
 	/* Counters below are atomic, and should be incremented as such. */
 	RTE_ATOMIC(uint64_t) guest_notifications;
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index db9985c9b9..b056c83d8f 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -2975,6 +2975,7 @@ desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		if (mbuf_avail == 0) {
 			cur = rte_pktmbuf_alloc(mbuf_pool);
 			if (unlikely(cur == NULL)) {
+				vq->stats.mbuf_alloc_failed++;
 				VHOST_DATA_LOG(dev->ifname, ERR,
 					"failed to allocate memory for mbuf.");
 				goto error;
@@ -3103,8 +3104,10 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	count = RTE_MIN(count, avail_entries);
 	VHOST_DATA_LOG(dev->ifname, DEBUG, "about to dequeue %u buffers", count);
 
-	if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count))
+	if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) {
+		vq->stats.mbuf_alloc_failed += count;
 		return 0;
+	}
 
 	for (i = 0; i < count; i++) {
 		struct buf_vector buf_vec[BUF_VECTOR_MAX];
@@ -3481,8 +3484,10 @@ virtio_dev_tx_packed(struct virtio_net *dev,
 {
 	uint32_t pkt_idx = 0;
 
-	if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count))
+	if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) {
+		vq->stats.mbuf_alloc_failed += count;
 		return 0;
+	}
 
 	do {
 		rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
@@ -3729,8 +3734,10 @@ virtio_dev_tx_async_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	count = RTE_MIN(count, avail_entries);
 	VHOST_DATA_LOG(dev->ifname, DEBUG, "about to dequeue %u buffers", count);
 
-	if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count))
+	if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) {
+		vq->stats.mbuf_alloc_failed += count;
 		goto out;
+	}
 
 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
 		uint16_t head_idx = 0;
@@ -4019,8 +4026,10 @@ virtio_dev_tx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
 	async_iter_reset(async);
 
-	if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count))
+	if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) {
+		vq->stats.mbuf_alloc_failed += count;
 		goto out;
+	}
 
 	do {
 		struct rte_mbuf *pkt = pkts_prealloc[pkt_idx];
-- 
2.43.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] vhost: fix memory leak in Virtio Tx split path
  2024-01-31  9:31 [PATCH 1/2] vhost: fix memory leak in Virtio Tx split path Maxime Coquelin
  2024-01-31  9:31 ` [PATCH 2/2] vhost: add new mbuf allocation failure statistic Maxime Coquelin
@ 2024-01-31 13:19 ` David Marchand
  2024-01-31 13:32   ` Maxime Coquelin
  1 sibling, 1 reply; 4+ messages in thread
From: David Marchand @ 2024-01-31 13:19 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev, chenbox, bnemeth, echaudro, stable

On Wed, Jan 31, 2024 at 10:31 AM Maxime Coquelin
<maxime.coquelin@redhat.com> wrote:
>
> When vIOMMU is enabled and Virtio device is bound to kernel
> driver in guest, rte_vhost_dequeue_burst() will often return
> early because of IOTLB misses.
>
> This patch fixes a mbuf leak occurring in this case.
>
> Fixes: 242695f6122a ("vhost: allocate and free packets in bulk in Tx split")
> Cc: stable@dpdk.org
>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/vhost/virtio_net.c | 20 ++++++++------------
>  1 file changed, 8 insertions(+), 12 deletions(-)
>
> diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
> index 280d4845f8..db9985c9b9 100644
> --- a/lib/vhost/virtio_net.c
> +++ b/lib/vhost/virtio_net.c
> @@ -3120,11 +3120,8 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
>                                                 VHOST_ACCESS_RO) < 0))
>                         break;
>
> -               c(vq, head_idx, 0);
> -
>                 if (unlikely(buf_len <= dev->vhost_hlen)) {
> -                       dropped += 1;
> -                       i++;
> +                       dropped = 1;
>                         break;
>                 }

``i`` was used for both filling the returned mbuf array, but also to
update the shadow_used_idx / array.

So this change here also affects how the currently considered
descriptor is returned through the used ring.

See below...

>
> @@ -3143,8 +3140,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
>                                         buf_len, mbuf_pool->name);
>                                 allocerr_warned = true;
>                         }
> -                       dropped += 1;
> -                       i++;
> +                       dropped = 1;
>                         break;
>                 }
>
> @@ -3155,17 +3151,17 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
>                                 VHOST_DATA_LOG(dev->ifname, ERR, "failed to copy desc to mbuf.");
>                                 allocerr_warned = true;
>                         }
> -                       dropped += 1;
> -                       i++;
> +                       dropped = 1;
>                         break;
>                 }
>
> +               update_shadow_used_ring_split(vq, head_idx, 0);
>         }
>
> -       if (dropped)
> -               rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1);
> +       if (unlikely(count != i))
> +               rte_pktmbuf_free_bulk(&pkts[i], count - i);
>
> -       vq->last_avail_idx += i;
> +       vq->last_avail_idx += i + dropped;
>
>         do_data_copy_dequeue(vq);
>         if (unlikely(i < count))

... I am copying the rest of the context:
       if (unlikely(i < count))
               vq->shadow_used_idx = i;

Before the patch, when breaking and doing the i++ stuff,
vq->shadow_used_idx was probably already equal to i because
update_shadow_used_ring_split had been called earlier.
Because of this, the "dropped" descriptor was part of the shadow_used
array for returning it through the used ring.

With the patch, since we break without touching i, it means that the
"dropped" descriptor is not returned anymore.

Fixing this issue could take the form of restoring the call to
update_shadow_used_ring_split in the loop and adjust
vq->shadow_used_idx to i + dropped.

But I think we can go one step further, by noting that
vq->last_avail_idx is being incremented by the same "i + dropped"
value.
Meaning that we could simply rely on calls to
update_shadow_used_ring_split and reuse vq->shadow_used_idx to adjust
vq->last_avail_idx.
By doing this, there is no need for a "dropped" variable, and no need
for touching of vq->shadow_used_idx manually, which is probably better
for robustness / easing readability.


Note: we could also move the call do_data_copy_dequeue() under the
check on vq->shadow_used_idx != 0, though it won't probably change
anything.


This gives the following (untested) diff, on top of your fix:
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 211d24b36a..4e1d61bd54 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -3104,7 +3104,6 @@ virtio_dev_tx_split(struct virtio_net *dev,
struct vhost_virtqueue *vq,
 {
        uint16_t i;
        uint16_t avail_entries;
-       uint16_t dropped = 0;
        static bool allocerr_warned;

        /*
@@ -3141,10 +3140,10 @@ virtio_dev_tx_split(struct virtio_net *dev,
struct vhost_virtqueue *vq,
                                                VHOST_ACCESS_RO) < 0))
                        break;

-               if (unlikely(buf_len <= dev->vhost_hlen)) {
-                       dropped = 1;
+               update_shadow_used_ring_split(vq, head_idx, 0);
+
+               if (unlikely(buf_len <= dev->vhost_hlen))
                        break;
-               }

                buf_len -= dev->vhost_hlen;

@@ -3161,7 +3160,6 @@ virtio_dev_tx_split(struct virtio_net *dev,
struct vhost_virtqueue *vq,
                                        buf_len, mbuf_pool->name);
                                allocerr_warned = true;
                        }
-                       dropped = 1;
                        break;
                }

@@ -3172,22 +3170,16 @@ virtio_dev_tx_split(struct virtio_net *dev,
struct vhost_virtqueue *vq,
                                VHOST_DATA_LOG(dev->ifname, ERR,
"failed to copy desc to mbuf.");
                                allocerr_warned = true;
                        }
-                       dropped = 1;
                        break;
                }
-
-               update_shadow_used_ring_split(vq, head_idx, 0);
        }

        if (unlikely(count != i))
                rte_pktmbuf_free_bulk(&pkts[i], count - i);

-       vq->last_avail_idx += i + dropped;
-
        do_data_copy_dequeue(vq);
-       if (unlikely(i < count))
-               vq->shadow_used_idx = i;
        if (likely(vq->shadow_used_idx)) {
+               vq->last_avail_idx += vq->shadow_used_idx;
                flush_shadow_used_ring_split(dev, vq);
                vhost_vring_call_split(dev, vq);
        }


-- 
David Marchand


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] vhost: fix memory leak in Virtio Tx split path
  2024-01-31 13:19 ` [PATCH 1/2] vhost: fix memory leak in Virtio Tx split path David Marchand
@ 2024-01-31 13:32   ` Maxime Coquelin
  0 siblings, 0 replies; 4+ messages in thread
From: Maxime Coquelin @ 2024-01-31 13:32 UTC (permalink / raw)
  To: David Marchand; +Cc: dev, chenbox, bnemeth, echaudro, stable

Hi David,

On 1/31/24 14:19, David Marchand wrote:
> On Wed, Jan 31, 2024 at 10:31 AM Maxime Coquelin
> <maxime.coquelin@redhat.com> wrote:
>>
>> When vIOMMU is enabled and Virtio device is bound to kernel
>> driver in guest, rte_vhost_dequeue_burst() will often return
>> early because of IOTLB misses.
>>
>> This patch fixes a mbuf leak occurring in this case.
>>
>> Fixes: 242695f6122a ("vhost: allocate and free packets in bulk in Tx split")
>> Cc: stable@dpdk.org
>>
>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>> ---
>>   lib/vhost/virtio_net.c | 20 ++++++++------------
>>   1 file changed, 8 insertions(+), 12 deletions(-)
>>
>> diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
>> index 280d4845f8..db9985c9b9 100644
>> --- a/lib/vhost/virtio_net.c
>> +++ b/lib/vhost/virtio_net.c
>> @@ -3120,11 +3120,8 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
>>                                                  VHOST_ACCESS_RO) < 0))
>>                          break;
>>
>> -               c(vq, head_idx, 0);
>> -
>>                  if (unlikely(buf_len <= dev->vhost_hlen)) {
>> -                       dropped += 1;
>> -                       i++;
>> +                       dropped = 1;
>>                          break;
>>                  }
> 
> ``i`` was used for both filling the returned mbuf array, but also to
> update the shadow_used_idx / array.
> 
> So this change here also affects how the currently considered
> descriptor is returned through the used ring.
> 
> See below...
> 
>>
>> @@ -3143,8 +3140,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
>>                                          buf_len, mbuf_pool->name);
>>                                  allocerr_warned = true;
>>                          }
>> -                       dropped += 1;
>> -                       i++;
>> +                       dropped = 1;
>>                          break;
>>                  }
>>
>> @@ -3155,17 +3151,17 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
>>                                  VHOST_DATA_LOG(dev->ifname, ERR, "failed to copy desc to mbuf.");
>>                                  allocerr_warned = true;
>>                          }
>> -                       dropped += 1;
>> -                       i++;
>> +                       dropped = 1;
>>                          break;
>>                  }
>>
>> +               update_shadow_used_ring_split(vq, head_idx, 0);
>>          }
>>
>> -       if (dropped)
>> -               rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1);
>> +       if (unlikely(count != i))
>> +               rte_pktmbuf_free_bulk(&pkts[i], count - i);
>>
>> -       vq->last_avail_idx += i;
>> +       vq->last_avail_idx += i + dropped;
>>
>>          do_data_copy_dequeue(vq);
>>          if (unlikely(i < count))
> 
> ... I am copying the rest of the context:
>         if (unlikely(i < count))
>                 vq->shadow_used_idx = i;
> 
> Before the patch, when breaking and doing the i++ stuff,
> vq->shadow_used_idx was probably already equal to i because
> update_shadow_used_ring_split had been called earlier.
> Because of this, the "dropped" descriptor was part of the shadow_used
> array for returning it through the used ring.
> 
> With the patch, since we break without touching i, it means that the
> "dropped" descriptor is not returned anymore.
> 
> Fixing this issue could take the form of restoring the call to
> update_shadow_used_ring_split in the loop and adjust
> vq->shadow_used_idx to i + dropped.
> 
> But I think we can go one step further, by noting that
> vq->last_avail_idx is being incremented by the same "i + dropped"
> value.
> Meaning that we could simply rely on calls to
> update_shadow_used_ring_split and reuse vq->shadow_used_idx to adjust
> vq->last_avail_idx.
> By doing this, there is no need for a "dropped" variable, and no need
> for touching of vq->shadow_used_idx manually, which is probably better
> for robustness / easing readability.

I fully agree with your suggestion as discussed off-list.
On top of that, it also makes the split code closer to the packed one.

> 
> Note: we could also move the call do_data_copy_dequeue() under the
> check on vq->shadow_used_idx != 0, though it won't probably change
> anything.

I will move the call to do_data_copy_dequeue() under vq->shadow_used_idx
!= 0 check.

Thanks,
Maxime

> 
> This gives the following (untested) diff, on top of your fix:
> diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
> index 211d24b36a..4e1d61bd54 100644
> --- a/lib/vhost/virtio_net.c
> +++ b/lib/vhost/virtio_net.c
> @@ -3104,7 +3104,6 @@ virtio_dev_tx_split(struct virtio_net *dev,
> struct vhost_virtqueue *vq,
>   {
>          uint16_t i;
>          uint16_t avail_entries;
> -       uint16_t dropped = 0;
>          static bool allocerr_warned;
> 
>          /*
> @@ -3141,10 +3140,10 @@ virtio_dev_tx_split(struct virtio_net *dev,
> struct vhost_virtqueue *vq,
>                                                  VHOST_ACCESS_RO) < 0))
>                          break;
> 
> -               if (unlikely(buf_len <= dev->vhost_hlen)) {
> -                       dropped = 1;
> +               update_shadow_used_ring_split(vq, head_idx, 0);
> +
> +               if (unlikely(buf_len <= dev->vhost_hlen))
>                          break;
> -               }
> 
>                  buf_len -= dev->vhost_hlen;
> 
> @@ -3161,7 +3160,6 @@ virtio_dev_tx_split(struct virtio_net *dev,
> struct vhost_virtqueue *vq,
>                                          buf_len, mbuf_pool->name);
>                                  allocerr_warned = true;
>                          }
> -                       dropped = 1;
>                          break;
>                  }
> 
> @@ -3172,22 +3170,16 @@ virtio_dev_tx_split(struct virtio_net *dev,
> struct vhost_virtqueue *vq,
>                                  VHOST_DATA_LOG(dev->ifname, ERR,
> "failed to copy desc to mbuf.");
>                                  allocerr_warned = true;
>                          }
> -                       dropped = 1;
>                          break;
>                  }
> -
> -               update_shadow_used_ring_split(vq, head_idx, 0);
>          }
> 
>          if (unlikely(count != i))
>                  rte_pktmbuf_free_bulk(&pkts[i], count - i);
> 
> -       vq->last_avail_idx += i + dropped;
> -
>          do_data_copy_dequeue(vq);
> -       if (unlikely(i < count))
> -               vq->shadow_used_idx = i;
>          if (likely(vq->shadow_used_idx)) {
> +               vq->last_avail_idx += vq->shadow_used_idx;
>                  flush_shadow_used_ring_split(dev, vq);
>                  vhost_vring_call_split(dev, vq);
>          }
> 
> 


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-01-31 13:32 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-31  9:31 [PATCH 1/2] vhost: fix memory leak in Virtio Tx split path Maxime Coquelin
2024-01-31  9:31 ` [PATCH 2/2] vhost: add new mbuf allocation failure statistic Maxime Coquelin
2024-01-31 13:19 ` [PATCH 1/2] vhost: fix memory leak in Virtio Tx split path David Marchand
2024-01-31 13:32   ` Maxime Coquelin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).