From: Long Li <longli@microsoft.com> netvsc uses rxbuf_info buffer to track received packets attached via rte_pktmbuf_attach_extbuf() and ack the host based on usage count. It uses the transaction_id in the VMBus packet to locate where to use memory in the rxbuf_info. This is not correct in multiple channel setup, as different channels may return idential transaction_ids at a time, and may corrupt the rxbuf_info buffer. Fix this by defining rxbuf_info for each queue. Fixes: 4e9c73e96 ("net/netvsc: add Hyper-V network device") Cc: stable@dpdk.org Signed-off-by: Long Li <longli@microsoft.com> --- drivers/net/netvsc/hn_nvs.c | 13 +++++++++---- drivers/net/netvsc/hn_rxtx.c | 33 ++++++++++++++++++++++++++------- drivers/net/netvsc/hn_var.h | 6 +++--- 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/drivers/net/netvsc/hn_nvs.c b/drivers/net/netvsc/hn_nvs.c index f88854daf..eeb82ab9e 100644 --- a/drivers/net/netvsc/hn_nvs.c +++ b/drivers/net/netvsc/hn_nvs.c @@ -223,9 +223,15 @@ hn_nvs_conn_rxbuf(struct hn_data *hv) resp.nvs_sect[0].slotcnt); hv->rxbuf_section_cnt = resp.nvs_sect[0].slotcnt; - hv->rxbuf_info = rte_calloc("HN_RXBUF_INFO", hv->rxbuf_section_cnt, - sizeof(*hv->rxbuf_info), RTE_CACHE_LINE_SIZE); - if (!hv->rxbuf_info) { + /* + * Pimary queue's rxbuf_info is not allocated at creation time. + * Now we can allocate it after we figure out the slotcnt. + */ + hv->primary->rxbuf_info = rte_calloc("HN_RXBUF_INFO", + hv->rxbuf_section_cnt, + sizeof(*hv->primary->rxbuf_info), + RTE_CACHE_LINE_SIZE); + if (!hv->primary->rxbuf_info) { PMD_DRV_LOG(ERR, "could not allocate rxbuf info"); return -ENOMEM; @@ -255,7 +261,6 @@ hn_nvs_disconn_rxbuf(struct hn_data *hv) error); } - rte_free(hv->rxbuf_info); /* * Linger long enough for NVS to disconnect RXBUF. */ diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c index 87b1184bc..c8c4ee10c 100644 --- a/drivers/net/netvsc/hn_rxtx.c +++ b/drivers/net/netvsc/hn_rxtx.c @@ -524,21 +524,21 @@ hn_rndis_rxinfo(const void *info_data, unsigned int info_dlen, static void hn_rx_buf_free_cb(void *buf __rte_unused, void *opaque) { struct hn_rx_bufinfo *rxb = opaque; - struct hn_data *hv = rxb->hv; + struct hn_rx_queue *rxq = rxb->rxq; - rte_atomic32_dec(&hv->rxbuf_outstanding); + rte_atomic32_dec(&rxq->rxbuf_outstanding); hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); } -static struct hn_rx_bufinfo *hn_rx_buf_init(const struct hn_rx_queue *rxq, +static struct hn_rx_bufinfo *hn_rx_buf_init(struct hn_rx_queue *rxq, const struct vmbus_chanpkt_rxbuf *pkt) { struct hn_rx_bufinfo *rxb; - rxb = rxq->hv->rxbuf_info + pkt->hdr.xactid; + rxb = rxq->rxbuf_info + pkt->hdr.xactid; rxb->chan = rxq->chan; rxb->xactid = pkt->hdr.xactid; - rxb->hv = rxq->hv; + rxb->rxq = rxq; rxb->shinfo.free_cb = hn_rx_buf_free_cb; rxb->shinfo.fcb_opaque = rxb; @@ -568,7 +568,7 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, * some space available in receive area for later packets. */ if (dlen >= HN_RXCOPY_THRESHOLD && - (uint32_t)rte_atomic32_read(&hv->rxbuf_outstanding) < + (uint32_t)rte_atomic32_read(&rxq->rxbuf_outstanding) < hv->rxbuf_section_cnt / 2) { struct rte_mbuf_ext_shared_info *shinfo; const void *rxbuf; @@ -585,7 +585,7 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, /* shinfo is already set to 1 by the caller */ if (rte_mbuf_ext_refcnt_update(shinfo, 1) == 2) - rte_atomic32_inc(&hv->rxbuf_outstanding); + rte_atomic32_inc(&rxq->rxbuf_outstanding); rte_pktmbuf_attach_extbuf(m, data, iova, dlen + headroom, shinfo); @@ -888,6 +888,23 @@ struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv, return NULL; } + /* setup rxbuf_info for non-primary queue */ + if (queue_id) { + rxq->rxbuf_info = rte_calloc("HN_RXBUF_INFO", + hv->rxbuf_section_cnt, + sizeof(*rxq->rxbuf_info), + RTE_CACHE_LINE_SIZE); + + if (!rxq->rxbuf_info) { + PMD_DRV_LOG(ERR, + "Could not allocate rxbuf info for queue %d\n", + queue_id); + rte_free(rxq->event_buf); + rte_free(rxq); + return NULL; + } + } + return rxq; } @@ -953,6 +970,7 @@ hn_dev_rx_queue_setup(struct rte_eth_dev *dev, fail: rte_ring_free(rxq->rx_ring); + rte_free(rxq->rxbuf_info); rte_free(rxq->event_buf); rte_free(rxq); return error; @@ -975,6 +993,7 @@ hn_rx_queue_free(struct hn_rx_queue *rxq, bool keep_primary) if (keep_primary && rxq == rxq->hv->primary) return; + rte_free(rxq->rxbuf_info); rte_free(rxq->event_buf); rte_free(rxq); } diff --git a/drivers/net/netvsc/hn_var.h b/drivers/net/netvsc/hn_var.h index 7cb7713e9..4b63f8760 100644 --- a/drivers/net/netvsc/hn_var.h +++ b/drivers/net/netvsc/hn_var.h @@ -83,13 +83,15 @@ struct hn_rx_queue { struct hn_stats stats; void *event_buf; + struct hn_rx_bufinfo *rxbuf_info; + rte_atomic32_t rxbuf_outstanding; }; /* multi-packet data from host */ struct hn_rx_bufinfo { struct vmbus_channel *chan; - struct hn_data *hv; + struct hn_rx_queue *rxq; uint64_t xactid; struct rte_mbuf_ext_shared_info shinfo; } __rte_cache_aligned; @@ -111,9 +113,7 @@ struct hn_data { uint32_t link_speed; struct rte_mem_resource *rxbuf_res; /* UIO resource for Rx */ - struct hn_rx_bufinfo *rxbuf_info; uint32_t rxbuf_section_cnt; /* # of Rx sections */ - rte_atomic32_t rxbuf_outstanding; uint16_t max_queues; /* Max available queues */ uint16_t num_queues; uint64_t rss_offloads; -- 2.25.1
From: Long Li <longli@microsoft.com> netvsc is a high speed VMBus device that uses monitor bit to signal the host. It's not necessary to send interrupts via INT bit. Signed-off-by: Long Li <longli@microsoft.com> --- drivers/bus/vmbus/vmbus_channel.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/bus/vmbus/vmbus_channel.c b/drivers/bus/vmbus/vmbus_channel.c index 4f5578e2c..f67f1c438 100644 --- a/drivers/bus/vmbus/vmbus_channel.c +++ b/drivers/bus/vmbus/vmbus_channel.c @@ -26,18 +26,6 @@ vmbus_sync_set_bit(volatile uint32_t *addr, uint32_t mask) __sync_or_and_fetch(addr, mask); } -static inline void -vmbus_send_interrupt(const struct rte_vmbus_device *dev, uint32_t relid) -{ - uint32_t *int_addr; - uint32_t int_mask; - - int_addr = dev->int_page + relid / 32; - int_mask = 1u << (relid % 32); - - vmbus_sync_set_bit(int_addr, int_mask); -} - static inline void vmbus_set_monitor(const struct rte_vmbus_device *dev, uint32_t monitor_id) { @@ -55,7 +43,6 @@ static void vmbus_set_event(const struct rte_vmbus_device *dev, const struct vmbus_channel *chan) { - vmbus_send_interrupt(dev, chan->relid); vmbus_set_monitor(dev, chan->monitor_id); } -- 2.25.1
From: Long Li <longli@microsoft.com> chim_index could potentially be used in other hn_txdesc when re-allocated. Mark it as invalid to prevent stale value being used. Signed-off-by: Long Li <longli@microsoft.com> --- drivers/net/netvsc/hn_rxtx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c index c8c4ee10c..a388ff258 100644 --- a/drivers/net/netvsc/hn_rxtx.c +++ b/drivers/net/netvsc/hn_rxtx.c @@ -419,8 +419,10 @@ hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, ++txq->stats.errors; } - if (txd->chim_index != NVS_CHIM_IDX_INVALID) + if (txd->chim_index != NVS_CHIM_IDX_INVALID) { hn_chim_free(hv, txd->chim_index); + txd->chim_index = NVS_CHIM_IDX_INVALID; + } rte_pktmbuf_free(txd->m); hn_txd_put(txq, txd); -- 2.25.1
From: Stephen Hemminger <stephen@networkplumber.org> The data from the host is trusted but checked by the driver. One check that is missing is that the packet offset and length might cause wraparound. Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> Signed-off-by: Long Li <longli@microsoft.com> --- drivers/net/netvsc/hn_rxtx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c index a388ff258..d8d3f07f5 100644 --- a/drivers/net/netvsc/hn_rxtx.c +++ b/drivers/net/netvsc/hn_rxtx.c @@ -666,7 +666,8 @@ static void hn_rndis_rx_data(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, void *data, uint32_t dlen) { - unsigned int data_off, data_len, pktinfo_off, pktinfo_len; + unsigned int data_off, data_len, total_len; + unsigned int pktinfo_off, pktinfo_len; const struct rndis_packet_msg *pkt = data; struct hn_rxinfo info = { .vlan_info = HN_NDIS_VLAN_INFO_INVALID, @@ -711,7 +712,8 @@ static void hn_rndis_rx_data(struct hn_rx_queue *rxq, goto error; } - if (unlikely(data_off + data_len > pkt->len)) + if (__builtin_add_overflow(data_off, data_len, &total_len) || + total_len > pkt->len) goto error; if (unlikely(data_len < RTE_ETHER_HDR_LEN)) -- 2.25.1
On Mon, 10 Aug 2020 19:33:12 -0700
longli@linuxonhyperv.com wrote:
> From: Long Li <longli@microsoft.com>
>
> netvsc is a high speed VMBus device that uses monitor bit to signal the
> host. It's not necessary to send interrupts via INT bit.
>
> Signed-off-by: Long Li <longli@microsoft.com>
Acked-by: Stephen Hemminger <stephen@networplumber.org>
On Mon, 10 Aug 2020 19:33:13 -0700
longli@linuxonhyperv.com wrote:
> From: Long Li <longli@microsoft.com>
>
> chim_index could potentially be used in other hn_txdesc when re-allocated.
> Mark it as invalid to prevent stale value being used.
>
> Signed-off-by: Long Li <longli@microsoft.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
On Mon, 10 Aug 2020 19:33:14 -0700
longli@linuxonhyperv.com wrote:
> From: Stephen Hemminger <stephen@networkplumber.org>
>
> The data from the host is trusted but checked by the driver.
> One check that is missing is that the packet offset and length
> might cause wraparound.
>
> Cc: stable@dpdk.org
>
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> Signed-off-by: Long Li <longli@microsoft.com>
Reported-by: Nan Chen <whutchennan@gmail.com>
On Mon, 10 Aug 2020 19:33:11 -0700
longli@linuxonhyperv.com wrote:
> From: Long Li <longli@microsoft.com>
>
> netvsc uses rxbuf_info buffer to track received packets attached via
> rte_pktmbuf_attach_extbuf() and ack the host based on usage count. It uses
> the transaction_id in the VMBus packet to locate where to use memory in the
> rxbuf_info.
>
> This is not correct in multiple channel setup, as different channels may
> return idential transaction_ids at a time, and may corrupt the rxbuf_info
> buffer.
>
> Fix this by defining rxbuf_info for each queue.
>
> Fixes: 4e9c73e96 ("net/netvsc: add Hyper-V network device")
> Cc: stable@dpdk.org
> Signed-off-by: Long Li <longli@microsoft.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
On 8/17/2020 12:11 AM, Stephen Hemminger wrote:
> On Mon, 10 Aug 2020 19:33:11 -0700
> longli@linuxonhyperv.com wrote:
>
>> From: Long Li <longli@microsoft.com>
>>
>> netvsc uses rxbuf_info buffer to track received packets attached via
>> rte_pktmbuf_attach_extbuf() and ack the host based on usage count. It uses
>> the transaction_id in the VMBus packet to locate where to use memory in the
>> rxbuf_info.
>>
>> This is not correct in multiple channel setup, as different channels may
>> return idential transaction_ids at a time, and may corrupt the rxbuf_info
>> buffer.
>>
>> Fix this by defining rxbuf_info for each queue.
>>
>> Fixes: 4e9c73e96 ("net/netvsc: add Hyper-V network device")
>> Cc: stable@dpdk.org
>> Signed-off-by: Long Li <longli@microsoft.com>
>
> Acked-by: Stephen Hemminger <stephen@networkplumber.org>
>
Series applied to dpdk-next-net/main, thanks.
On Mon, 2020-08-10 at 19:33 -0700, longli@linuxonhyperv.com wrote:
> From: Stephen Hemminger <stephen@networkplumber.org>
>
> The data from the host is trusted but checked by the driver.
> One check that is missing is that the packet offset and length
> might cause wraparound.
>
> Cc: stable@dpdk.org
>
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> Signed-off-by: Long Li <longli@microsoft.com>
> ---
> drivers/net/netvsc/hn_rxtx.c | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c
> index a388ff258..d8d3f07f5 100644
> --- a/drivers/net/netvsc/hn_rxtx.c
> +++ b/drivers/net/netvsc/hn_rxtx.c
> @@ -666,7 +666,8 @@ static void hn_rndis_rx_data(struct hn_rx_queue *rxq,
> struct hn_rx_bufinfo *rxb,
> void *data, uint32_t dlen)
> {
> - unsigned int data_off, data_len, pktinfo_off, pktinfo_len;
> + unsigned int data_off, data_len, total_len;
> + unsigned int pktinfo_off, pktinfo_len;
> const struct rndis_packet_msg *pkt = data;
> struct hn_rxinfo info = {
> .vlan_info = HN_NDIS_VLAN_INFO_INVALID,
> @@ -711,7 +712,8 @@ static void hn_rndis_rx_data(struct hn_rx_queue *rxq,
> goto error;
> }
>
> - if (unlikely(data_off + data_len > pkt->len))
> + if (__builtin_add_overflow(data_off, data_len, &total_len) ||
> + total_len > pkt->len)
> goto error;
>
> if (unlikely(data_len < RTE_ETHER_HDR_LEN))
This patch breaks the build with GCC < 5 (CentOS 7, RHEL 7, SLE 12) as
__builtin_add_overflow is not available. Could you please send a follow
up to fix it?
--
Kind regards,
Luca Boccassi
On 10/27/2020 5:10 PM, Luca Boccassi wrote: > On Mon, 2020-08-10 at 19:33 -0700, longli@linuxonhyperv.com wrote: >> From: Stephen Hemminger <stephen@networkplumber.org> >> >> The data from the host is trusted but checked by the driver. >> One check that is missing is that the packet offset and length >> might cause wraparound. >> >> Cc: stable@dpdk.org >> >> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> >> Signed-off-by: Long Li <longli@microsoft.com> >> --- >> drivers/net/netvsc/hn_rxtx.c | 6 ++++-- >> 1 file changed, 4 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c >> index a388ff258..d8d3f07f5 100644 >> --- a/drivers/net/netvsc/hn_rxtx.c >> +++ b/drivers/net/netvsc/hn_rxtx.c >> @@ -666,7 +666,8 @@ static void hn_rndis_rx_data(struct hn_rx_queue *rxq, >> struct hn_rx_bufinfo *rxb, >> void *data, uint32_t dlen) >> { >> - unsigned int data_off, data_len, pktinfo_off, pktinfo_len; >> + unsigned int data_off, data_len, total_len; >> + unsigned int pktinfo_off, pktinfo_len; >> const struct rndis_packet_msg *pkt = data; >> struct hn_rxinfo info = { >> .vlan_info = HN_NDIS_VLAN_INFO_INVALID, >> @@ -711,7 +712,8 @@ static void hn_rndis_rx_data(struct hn_rx_queue *rxq, >> goto error; >> } >> >> - if (unlikely(data_off + data_len > pkt->len)) >> + if (__builtin_add_overflow(data_off, data_len, &total_len) || >> + total_len > pkt->len) >> goto error; >> >> if (unlikely(data_len < RTE_ETHER_HDR_LEN)) > > This patch breaks the build with GCC < 5 (CentOS 7, RHEL 7, SLE 12) as > __builtin_add_overflow is not available. Could you please send a follow > up to fix it? > It should be already fixed in the repo: https://git.dpdk.org/dpdk/commit/?id=d73543b5f46d Are you getting the build error with 20.11-rc1?
On Tue, 2020-10-27 at 23:07 +0000, Ferruh Yigit wrote:
> On 10/27/2020 5:10 PM, Luca Boccassi wrote:
> > On Mon, 2020-08-10 at 19:33 -0700, longli@linuxonhyperv.com wrote:
> > > From: Stephen Hemminger <stephen@networkplumber.org>
> > >
> > > The data from the host is trusted but checked by the driver.
> > > One check that is missing is that the packet offset and length
> > > might cause wraparound.
> > >
> > > Cc: stable@dpdk.org
> > >
> > > Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> > > Signed-off-by: Long Li <longli@microsoft.com>
> > > ---
> > > drivers/net/netvsc/hn_rxtx.c | 6 ++++--
> > > 1 file changed, 4 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c
> > > index a388ff258..d8d3f07f5 100644
> > > --- a/drivers/net/netvsc/hn_rxtx.c
> > > +++ b/drivers/net/netvsc/hn_rxtx.c
> > > @@ -666,7 +666,8 @@ static void hn_rndis_rx_data(struct hn_rx_queue *rxq,
> > > struct hn_rx_bufinfo *rxb,
> > > void *data, uint32_t dlen)
> > > {
> > > - unsigned int data_off, data_len, pktinfo_off, pktinfo_len;
> > > + unsigned int data_off, data_len, total_len;
> > > + unsigned int pktinfo_off, pktinfo_len;
> > > const struct rndis_packet_msg *pkt = data;
> > > struct hn_rxinfo info = {
> > > .vlan_info = HN_NDIS_VLAN_INFO_INVALID,
> > > @@ -711,7 +712,8 @@ static void hn_rndis_rx_data(struct hn_rx_queue *rxq,
> > > goto error;
> > > }
> > >
> > > - if (unlikely(data_off + data_len > pkt->len))
> > > + if (__builtin_add_overflow(data_off, data_len, &total_len) ||
> > > + total_len > pkt->len)
> > > goto error;
> > >
> > > if (unlikely(data_len < RTE_ETHER_HDR_LEN))
> >
> > This patch breaks the build with GCC < 5 (CentOS 7, RHEL 7, SLE 12) as
> > __builtin_add_overflow is not available. Could you please send a follow
> > up to fix it?
> >
>
> It should be already fixed in the repo:
> https://git.dpdk.org/dpdk/commit/?id=d73543b5f46d
>
> Are you getting the build error with 20.11-rc1?
No, with the backport. The original patch was marked for stable, but
the fixup was not. I'll pick it up.
--
Kind regards,
Luca Boccassi
On 10/28/2020 11:08 AM, Luca Boccassi wrote: > On Tue, 2020-10-27 at 23:07 +0000, Ferruh Yigit wrote: >> On 10/27/2020 5:10 PM, Luca Boccassi wrote: >>> On Mon, 2020-08-10 at 19:33 -0700, longli@linuxonhyperv.com wrote: >>>> From: Stephen Hemminger <stephen@networkplumber.org> >>>> >>>> The data from the host is trusted but checked by the driver. >>>> One check that is missing is that the packet offset and length >>>> might cause wraparound. >>>> >>>> Cc: stable@dpdk.org >>>> >>>> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> >>>> Signed-off-by: Long Li <longli@microsoft.com> >>>> --- >>>> drivers/net/netvsc/hn_rxtx.c | 6 ++++-- >>>> 1 file changed, 4 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c >>>> index a388ff258..d8d3f07f5 100644 >>>> --- a/drivers/net/netvsc/hn_rxtx.c >>>> +++ b/drivers/net/netvsc/hn_rxtx.c >>>> @@ -666,7 +666,8 @@ static void hn_rndis_rx_data(struct hn_rx_queue *rxq, >>>> struct hn_rx_bufinfo *rxb, >>>> void *data, uint32_t dlen) >>>> { >>>> - unsigned int data_off, data_len, pktinfo_off, pktinfo_len; >>>> + unsigned int data_off, data_len, total_len; >>>> + unsigned int pktinfo_off, pktinfo_len; >>>> const struct rndis_packet_msg *pkt = data; >>>> struct hn_rxinfo info = { >>>> .vlan_info = HN_NDIS_VLAN_INFO_INVALID, >>>> @@ -711,7 +712,8 @@ static void hn_rndis_rx_data(struct hn_rx_queue *rxq, >>>> goto error; >>>> } >>>> >>>> - if (unlikely(data_off + data_len > pkt->len)) >>>> + if (__builtin_add_overflow(data_off, data_len, &total_len) || >>>> + total_len > pkt->len) >>>> goto error; >>>> >>>> if (unlikely(data_len < RTE_ETHER_HDR_LEN)) >>> >>> This patch breaks the build with GCC < 5 (CentOS 7, RHEL 7, SLE 12) as >>> __builtin_add_overflow is not available. Could you please send a follow >>> up to fix it? >>> >> >> It should be already fixed in the repo: >> https://git.dpdk.org/dpdk/commit/?id=d73543b5f46d >> >> Are you getting the build error with 20.11-rc1? > > No, with the backport. The original patch was marked for stable, but > the fixup was not. Yes, it should be also marked for stable, seems missed. > I'll pick it up. > Thanks.