patches for DPDK stable branches
 help / color / mirror / Atom feed
* [dpdk-stable] [PATCH 1/2] net/netvsc: handle receive packets during multi-channel setup
       [not found] <20200316235612.29854-1-stephen@networkplumber.org>
@ 2020-03-16 23:56 ` Stephen Hemminger
       [not found] ` <20200318202957.18121-1-stephen@networkplumber.org>
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2020-03-16 23:56 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, stable

It is possible for a packet to arrive during the configuration
process when setting up multiple queue mode. This would cause
configure to fail; fix by just ignoring receive packets while
waiting for control commands.

Add a spinlock to ensure that two control operations are not
attempted at once.

Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device")
Cc: stable@dpdk.org
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/netvsc/hn_ethdev.c |  1 +
 drivers/net/netvsc/hn_nvs.c    | 40 ++++++++++++++++++++++++++++++++--
 drivers/net/netvsc/hn_var.h    |  1 +
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index 564620748daf..01e69de6f3e2 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -951,6 +951,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
 	hv->latency = HN_CHAN_LATENCY_NS;
 	hv->max_queues = 1;
 	rte_spinlock_init(&hv->vf_lock);
+	rte_spinlock_init(&hv->nvs_lock);
 	hv->vf_port = HN_INVALID_PORT;
 
 	err = hn_parse_args(eth_dev);
diff --git a/drivers/net/netvsc/hn_nvs.c b/drivers/net/netvsc/hn_nvs.c
index 6b518685ab6f..f56692f4d605 100644
--- a/drivers/net/netvsc/hn_nvs.c
+++ b/drivers/net/netvsc/hn_nvs.c
@@ -54,7 +54,7 @@ static int hn_nvs_req_send(struct hn_data *hv,
 }
 
 static int
-hn_nvs_execute(struct hn_data *hv,
+__hn_nvs_execute(struct hn_data *hv,
 	       void *req, uint32_t reqlen,
 	       void *resp, uint32_t resplen,
 	       uint32_t type)
@@ -62,6 +62,7 @@ hn_nvs_execute(struct hn_data *hv,
 	struct vmbus_channel *chan = hn_primary_chan(hv);
 	char buffer[NVS_RESPSIZE_MAX];
 	const struct hn_nvs_hdr *hdr;
+	uint64_t xactid;
 	uint32_t len;
 	int ret;
 
@@ -77,7 +78,7 @@ hn_nvs_execute(struct hn_data *hv,
 
  retry:
 	len = sizeof(buffer);
-	ret = rte_vmbus_chan_recv(chan, buffer, &len, NULL);
+	ret = rte_vmbus_chan_recv(chan, buffer, &len, &xactid);
 	if (ret == -EAGAIN) {
 		rte_delay_us(HN_CHAN_INTERVAL_US);
 		goto retry;
@@ -88,7 +89,20 @@ hn_nvs_execute(struct hn_data *hv,
 		return ret;
 	}
 
+	if (len < sizeof(*hdr)) {
+		PMD_DRV_LOG(ERR, "response missing NVS header");
+		return -EINVAL;
+	}
+
 	hdr = (struct hn_nvs_hdr *)buffer;
+
+	/* Silently drop received packets while waiting for response */
+	if (hdr->type == NVS_TYPE_RNDIS) {
+		hn_nvs_ack_rxbuf(chan, xactid);
+		--hv->rxbuf_outstanding;
+		goto retry;
+	}
+
 	if (hdr->type != type) {
 		PMD_DRV_LOG(ERR, "unexpected NVS resp %#x, expect %#x",
 			    hdr->type, type);
@@ -108,6 +122,28 @@ hn_nvs_execute(struct hn_data *hv,
 	return 0;
 }
 
+
+/*
+ * Execute one control command and get the response.
+ * Only one command can be active on a channel at once
+ * Unlike BSD, DPDK does not have an interrupt context
+ * so the polling is required to wait for response.
+ */
+static int
+hn_nvs_execute(struct hn_data *hv,
+	       void *req, uint32_t reqlen,
+	       void *resp, uint32_t resplen,
+	       uint32_t type)
+{
+	int ret;
+
+	rte_spinlock_lock(&hv->nvs_lock);
+	ret = __hn_nvs_execute(hv, req, reqlen, resp, resplen, type);
+	rte_spinlock_unlock(&hv->nvs_lock);
+
+	return ret;
+}
+
 static int
 hn_nvs_doinit(struct hn_data *hv, uint32_t nvs_ver)
 {
diff --git a/drivers/net/netvsc/hn_var.h b/drivers/net/netvsc/hn_var.h
index 05bc492511ec..7434ff19bfa5 100644
--- a/drivers/net/netvsc/hn_var.h
+++ b/drivers/net/netvsc/hn_var.h
@@ -97,6 +97,7 @@ struct hn_data {
 	struct rte_vmbus_device *vmbus;
 	struct hn_rx_queue *primary;
 	rte_spinlock_t  vf_lock;
+	rte_spinlock_t  nvs_lock;
 	uint16_t	port_id;
 	uint16_t	vf_port;
 
-- 
2.20.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-stable] [PATCH v2 1/4] net/netvsc: propogate descriptor limits from VF to netvsc
       [not found] ` <20200318202957.18121-1-stephen@networkplumber.org>
@ 2020-03-18 20:29   ` Stephen Hemminger
  2020-03-18 20:29   ` [dpdk-stable] [PATCH v2 2/4] net/netvsc: handle receive packets during multi-channel setup Stephen Hemminger
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2020-03-18 20:29 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, stable

If application cares about descriptor limits, the netvsc device
values should reflect those of the VF as well.

Fixes: dc7680e8597c ("net/netvsc: support integrated VF")
Cc: stable@dpdk.org
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/netvsc/hn_vf.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/netvsc/hn_vf.c b/drivers/net/netvsc/hn_vf.c
index 7a3734cadfa4..1261b2e2ef85 100644
--- a/drivers/net/netvsc/hn_vf.c
+++ b/drivers/net/netvsc/hn_vf.c
@@ -167,6 +167,17 @@ hn_nvs_handle_vfassoc(struct rte_eth_dev *dev,
 		hn_vf_remove(hv);
 }
 
+static void
+hn_vf_merge_desc_lim(struct rte_eth_desc_lim *lim,
+		     const struct rte_eth_desc_lim *vf_lim)
+{
+	lim->nb_max = RTE_MIN(vf_lim->nb_max, lim->nb_max);
+	lim->nb_min = RTE_MAX(vf_lim->nb_min, lim->nb_min);
+	lim->nb_align = RTE_MAX(vf_lim->nb_align, lim->nb_align);
+	lim->nb_seg_max = RTE_MIN(vf_lim->nb_seg_max, lim->nb_seg_max);
+	lim->nb_mtu_seg_max = RTE_MIN(vf_lim->nb_seg_max, lim->nb_seg_max);
+}
+
 /*
  * Merge the info from the VF and synthetic path.
  * use the default config of the VF
@@ -196,11 +207,13 @@ static int hn_vf_info_merge(struct rte_eth_dev *vf_dev,
 				      info->max_tx_queues);
 	info->tx_offload_capa &= vf_info.tx_offload_capa;
 	info->tx_queue_offload_capa &= vf_info.tx_queue_offload_capa;
+	hn_vf_merge_desc_lim(&info->tx_desc_lim, &vf_info.tx_desc_lim);
 
 	info->min_rx_bufsize = RTE_MAX(vf_info.min_rx_bufsize,
 				       info->min_rx_bufsize);
 	info->max_rx_pktlen  = RTE_MAX(vf_info.max_rx_pktlen,
 				       info->max_rx_pktlen);
+	hn_vf_merge_desc_lim(&info->rx_desc_lim, &vf_info.rx_desc_lim);
 
 	return 0;
 }
-- 
2.20.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-stable] [PATCH v2 2/4] net/netvsc: handle receive packets during multi-channel setup
       [not found] ` <20200318202957.18121-1-stephen@networkplumber.org>
  2020-03-18 20:29   ` [dpdk-stable] [PATCH v2 1/4] net/netvsc: propogate descriptor limits from VF to netvsc Stephen Hemminger
@ 2020-03-18 20:29   ` Stephen Hemminger
  2020-03-18 20:29   ` [dpdk-stable] [PATCH v2 3/4] net/netvsc: split send buffers from transmit descriptors Stephen Hemminger
  2020-03-18 20:29   ` [dpdk-stable] [PATCH v2 4/4] net/netvsc: don't enable RSS if only single receive queue Stephen Hemminger
  3 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2020-03-18 20:29 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, stable

It is possible for a packet to arrive during the configuration
process when setting up multiple queue mode. This would cause
configure to fail; fix by just ignoring receive packets while
waiting for control commands.

Use the receive ring lock to avoid possible races between
oddly behaved applications doing rx_burst and control operations
concurrently.

Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device")
Cc: stable@dpdk.org
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/netvsc/hn_nvs.c | 41 +++++++++++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/drivers/net/netvsc/hn_nvs.c b/drivers/net/netvsc/hn_nvs.c
index 6b518685ab6f..477202b2a0b7 100644
--- a/drivers/net/netvsc/hn_nvs.c
+++ b/drivers/net/netvsc/hn_nvs.c
@@ -54,7 +54,7 @@ static int hn_nvs_req_send(struct hn_data *hv,
 }
 
 static int
-hn_nvs_execute(struct hn_data *hv,
+__hn_nvs_execute(struct hn_data *hv,
 	       void *req, uint32_t reqlen,
 	       void *resp, uint32_t resplen,
 	       uint32_t type)
@@ -62,6 +62,7 @@ hn_nvs_execute(struct hn_data *hv,
 	struct vmbus_channel *chan = hn_primary_chan(hv);
 	char buffer[NVS_RESPSIZE_MAX];
 	const struct hn_nvs_hdr *hdr;
+	uint64_t xactid;
 	uint32_t len;
 	int ret;
 
@@ -77,7 +78,7 @@ hn_nvs_execute(struct hn_data *hv,
 
  retry:
 	len = sizeof(buffer);
-	ret = rte_vmbus_chan_recv(chan, buffer, &len, NULL);
+	ret = rte_vmbus_chan_recv(chan, buffer, &len, &xactid);
 	if (ret == -EAGAIN) {
 		rte_delay_us(HN_CHAN_INTERVAL_US);
 		goto retry;
@@ -88,7 +89,20 @@ hn_nvs_execute(struct hn_data *hv,
 		return ret;
 	}
 
+	if (len < sizeof(*hdr)) {
+		PMD_DRV_LOG(ERR, "response missing NVS header");
+		return -EINVAL;
+	}
+
 	hdr = (struct hn_nvs_hdr *)buffer;
+
+	/* Silently drop received packets while waiting for response */
+	if (hdr->type == NVS_TYPE_RNDIS) {
+		hn_nvs_ack_rxbuf(chan, xactid);
+		--hv->rxbuf_outstanding;
+		goto retry;
+	}
+
 	if (hdr->type != type) {
 		PMD_DRV_LOG(ERR, "unexpected NVS resp %#x, expect %#x",
 			    hdr->type, type);
@@ -108,6 +122,29 @@ hn_nvs_execute(struct hn_data *hv,
 	return 0;
 }
 
+
+/*
+ * Execute one control command and get the response.
+ * Only one command can be active on a channel at once
+ * Unlike BSD, DPDK does not have an interrupt context
+ * so the polling is required to wait for response.
+ */
+static int
+hn_nvs_execute(struct hn_data *hv,
+	       void *req, uint32_t reqlen,
+	       void *resp, uint32_t resplen,
+	       uint32_t type)
+{
+	struct hn_rx_queue *rxq = hv->primary;
+	int ret;
+
+	rte_spinlock_lock(&rxq->ring_lock);
+	ret = __hn_nvs_execute(hv, req, reqlen, resp, resplen, type);
+	rte_spinlock_unlock(&rxq->ring_lock);
+
+	return ret;
+}
+
 static int
 hn_nvs_doinit(struct hn_data *hv, uint32_t nvs_ver)
 {
-- 
2.20.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-stable] [PATCH v2 3/4] net/netvsc: split send buffers from transmit descriptors
       [not found] ` <20200318202957.18121-1-stephen@networkplumber.org>
  2020-03-18 20:29   ` [dpdk-stable] [PATCH v2 1/4] net/netvsc: propogate descriptor limits from VF to netvsc Stephen Hemminger
  2020-03-18 20:29   ` [dpdk-stable] [PATCH v2 2/4] net/netvsc: handle receive packets during multi-channel setup Stephen Hemminger
@ 2020-03-18 20:29   ` Stephen Hemminger
  2020-03-18 20:29   ` [dpdk-stable] [PATCH v2 4/4] net/netvsc: don't enable RSS if only single receive queue Stephen Hemminger
  3 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2020-03-18 20:29 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, stable

The VMBus has reserved transmit area (per device) and transmit descriptors
(per queue). The previous code was always having a 1:1 mapping between
send buffers and descriptors. This can lead to one queue starving another
and also buffer bloat.

Change to working more like FreeBSD where there is a pool of transmit
descriptors per queue. If send buffer is not available then no aggregation
happens but the queue can still drain.

Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device")
Cc: stable@dpdk.org
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/netvsc/hn_ethdev.c |   9 +-
 drivers/net/netvsc/hn_rxtx.c   | 258 ++++++++++++++++++++-------------
 drivers/net/netvsc/hn_var.h    |  10 +-
 3 files changed, 171 insertions(+), 106 deletions(-)

diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index 564620748daf..ac6610838008 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -257,6 +257,9 @@ static int hn_dev_info_get(struct rte_eth_dev *dev,
 	dev_info->max_rx_queues = hv->max_queues;
 	dev_info->max_tx_queues = hv->max_queues;
 
+	dev_info->tx_desc_lim.nb_min = 1;
+	dev_info->tx_desc_lim.nb_max = 4096;
+
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
 
@@ -982,7 +985,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
 	if  (err)
 		goto failed;
 
-	err = hn_tx_pool_init(eth_dev);
+	err = hn_chim_init(eth_dev);
 	if (err)
 		goto failed;
 
@@ -1018,7 +1021,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
 failed:
 	PMD_INIT_LOG(NOTICE, "device init failed");
 
-	hn_tx_pool_uninit(eth_dev);
+	hn_chim_uninit(eth_dev);
 	hn_detach(hv);
 	return err;
 }
@@ -1042,7 +1045,7 @@ eth_hn_dev_uninit(struct rte_eth_dev *eth_dev)
 	eth_dev->rx_pkt_burst = NULL;
 
 	hn_detach(hv);
-	hn_tx_pool_uninit(eth_dev);
+	hn_chim_uninit(eth_dev);
 	rte_vmbus_chan_close(hv->primary->chan);
 	rte_free(hv->primary);
 	ret = rte_eth_dev_owner_delete(hv->owner.id);
diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c
index 7212780c156e..f3ce5084eceb 100644
--- a/drivers/net/netvsc/hn_rxtx.c
+++ b/drivers/net/netvsc/hn_rxtx.c
@@ -18,6 +18,7 @@
 #include <rte_memzone.h>
 #include <rte_malloc.h>
 #include <rte_atomic.h>
+#include <rte_bitmap.h>
 #include <rte_branch_prediction.h>
 #include <rte_ether.h>
 #include <rte_common.h>
@@ -83,7 +84,7 @@ struct hn_txdesc {
 	struct rte_mbuf *m;
 
 	uint16_t	queue_id;
-	uint16_t	chim_index;
+	uint32_t	chim_index;
 	uint32_t	chim_size;
 	uint32_t	data_size;
 	uint32_t	packets;
@@ -98,6 +99,8 @@ struct hn_txdesc {
 	 RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) +	\
 	 RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
 
+#define HN_RNDIS_PKT_ALIGNED	RTE_ALIGN(HN_RNDIS_PKT_LEN, RTE_CACHE_LINE_SIZE)
+
 /* Minimum space required for a packet */
 #define HN_PKTSIZE_MIN(align) \
 	RTE_ALIGN(RTE_ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align)
@@ -150,63 +153,78 @@ hn_rndis_pktmsg_offset(uint32_t ofs)
 static void hn_txd_init(struct rte_mempool *mp __rte_unused,
 			void *opaque, void *obj, unsigned int idx)
 {
+	struct hn_tx_queue *txq = opaque;
 	struct hn_txdesc *txd = obj;
-	struct rte_eth_dev *dev = opaque;
-	struct rndis_packet_msg *pkt;
 
 	memset(txd, 0, sizeof(*txd));
-	txd->chim_index = idx;
-
-	pkt = rte_malloc_socket("RNDIS_TX", HN_RNDIS_PKT_LEN,
-				rte_align32pow2(HN_RNDIS_PKT_LEN),
-				dev->device->numa_node);
-	if (!pkt)
-		rte_exit(EXIT_FAILURE, "can not allocate RNDIS header");
 
-	txd->rndis_pkt = pkt;
+	txd->queue_id = txq->queue_id;
+	txd->chim_index = NVS_CHIM_IDX_INVALID;
+	txd->rndis_pkt = (struct rndis_packet_msg *)(char *)txq->tx_rndis
+		+ idx * HN_RNDIS_PKT_ALIGNED;
 }
 
-/*
- * Unlike Linux and FreeBSD, this driver uses a mempool
- * to limit outstanding transmits and reserve buffers
- */
 int
-hn_tx_pool_init(struct rte_eth_dev *dev)
+hn_chim_init(struct rte_eth_dev *dev)
 {
 	struct hn_data *hv = dev->data->dev_private;
-	char name[RTE_MEMPOOL_NAMESIZE];
-	struct rte_mempool *mp;
+	uint32_t i, chim_bmp_size;
 
-	snprintf(name, sizeof(name),
-		 "hn_txd_%u", dev->data->port_id);
-
-	PMD_INIT_LOG(DEBUG, "create a TX send pool %s n=%u size=%zu socket=%d",
-		     name, hv->chim_cnt, sizeof(struct hn_txdesc),
-		     dev->device->numa_node);
-
-	mp = rte_mempool_create(name, hv->chim_cnt, sizeof(struct hn_txdesc),
-				HN_TXD_CACHE_SIZE, 0,
-				NULL, NULL,
-				hn_txd_init, dev,
-				dev->device->numa_node, 0);
-	if (!mp) {
-		PMD_DRV_LOG(ERR,
-			    "mempool %s create failed: %d", name, rte_errno);
-		return -rte_errno;
+	rte_spinlock_init(&hv->chim_lock);
+	chim_bmp_size = rte_bitmap_get_memory_footprint(hv->chim_cnt);
+	hv->chim_bmem = rte_zmalloc("hn_chim_bitmap", chim_bmp_size,
+				    RTE_CACHE_LINE_SIZE);
+	if (hv->chim_bmem == NULL) {
+		PMD_INIT_LOG(ERR, "failed to allocate bitmap size %u", chim_bmp_size);
+		return -1;
+
+	}
+
+	PMD_INIT_LOG(DEBUG, "create a TX bitmap n=%u size=%u", hv->chim_cnt, chim_bmp_size);
+
+	hv->chim_bmap = rte_bitmap_init(hv->chim_cnt, hv->chim_bmem, chim_bmp_size);
+	if (hv->chim_bmap == NULL) {
+		PMD_INIT_LOG(ERR, "failed to init chim bitmap");
+		return -1;
 	}
 
-	hv->tx_pool = mp;
+	for (i = 0; i < hv->chim_cnt; i++)
+		rte_bitmap_set(hv->chim_bmap, i);
+
 	return 0;
 }
 
 void
-hn_tx_pool_uninit(struct rte_eth_dev *dev)
+hn_chim_uninit(struct rte_eth_dev *dev)
 {
 	struct hn_data *hv = dev->data->dev_private;
 
-	if (hv->tx_pool) {
-		rte_mempool_free(hv->tx_pool);
-		hv->tx_pool = NULL;
+	rte_bitmap_free(hv->chim_bmap);
+	rte_free(hv->chim_bmem);
+	hv->chim_bmem = NULL;
+}
+
+static uint32_t hn_chim_alloc(struct hn_data *hv)
+{
+	uint32_t index = NVS_CHIM_IDX_INVALID;
+	uint64_t slab;
+
+	rte_spinlock_lock(&hv->chim_lock);
+	if (rte_bitmap_scan(hv->chim_bmap, &index, &slab))
+		rte_bitmap_clear(hv->chim_bmap, index);
+	rte_spinlock_unlock(&hv->chim_lock);
+
+	return index;
+}
+
+static void hn_chim_free(struct hn_data *hv, uint32_t chim_idx)
+{
+	if (chim_idx >= hv->chim_cnt) {
+		PMD_DRV_LOG(ERR, "Invalid chimney index %u", chim_idx);
+	} else {
+		rte_spinlock_lock(&hv->chim_lock);
+		rte_bitmap_set(hv->chim_bmap, chim_idx);
+		rte_spinlock_unlock(&hv->chim_lock);
 	}
 }
 
@@ -220,15 +238,16 @@ static void hn_reset_txagg(struct hn_tx_queue *txq)
 
 int
 hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
-		      uint16_t queue_idx, uint16_t nb_desc __rte_unused,
+		      uint16_t queue_idx, uint16_t nb_desc,
 		      unsigned int socket_id,
 		      const struct rte_eth_txconf *tx_conf)
 
 {
 	struct hn_data *hv = dev->data->dev_private;
 	struct hn_tx_queue *txq;
+	char name[RTE_MEMPOOL_NAMESIZE];
 	uint32_t tx_free_thresh;
-	int err;
+	int err = -ENOMEM;
 
 	PMD_INIT_FUNC_TRACE();
 
@@ -252,6 +271,28 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
 	txq->free_thresh = tx_free_thresh;
 
+	snprintf(name, sizeof(name),
+		 "hn_txd_%u_%u", dev->data->port_id, queue_idx);
+
+	PMD_INIT_LOG(DEBUG, "TX descriptor pool %s n=%u size=%zu",
+		     name, nb_desc, sizeof(struct hn_txdesc));
+
+	txq->tx_rndis = rte_calloc("hn_txq_rndis", nb_desc, HN_RNDIS_PKT_ALIGNED,
+				RTE_CACHE_LINE_SIZE);
+	if (txq->tx_rndis == NULL)
+		goto error;
+
+	txq->txdesc_pool = rte_mempool_create(name, nb_desc, sizeof(struct hn_txdesc),
+					      0, 0,
+					      NULL, NULL,
+					      hn_txd_init, txq,
+					      dev->device->numa_node, 0);
+	if (txq->txdesc_pool == NULL) {
+		PMD_DRV_LOG(ERR,
+			    "mempool %s create failed: %d", name, rte_errno);
+		goto error;
+	}
+
 	txq->agg_szmax  = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size);
 	txq->agg_pktmax = hv->rndis_agg_pkts;
 	txq->agg_align  = hv->rndis_agg_align;
@@ -260,31 +301,57 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
 	err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc,
 				     socket_id, tx_conf);
-	if (err) {
-		rte_free(txq);
-		return err;
+	if (err == 0) {
+		dev->data->tx_queues[queue_idx] = txq;
+		return 0;
 	}
 
-	dev->data->tx_queues[queue_idx] = txq;
-	return 0;
+error:
+	if (txq->txdesc_pool)
+		rte_mempool_free(txq->txdesc_pool);
+	rte_free(txq->tx_rndis);
+	rte_free(txq);
+	return err;
+}
+
+
+static struct hn_txdesc *hn_txd_get(struct hn_tx_queue *txq)
+{
+	struct hn_txdesc *txd;
+
+	if (rte_mempool_get(txq->txdesc_pool, (void **)&txd)) {
+		++txq->stats.ring_full;
+		PMD_TX_LOG(DEBUG, "tx pool exhausted!");
+		return NULL;
+	}
+
+	txd->m = NULL;
+	txd->packets = 0;
+	txd->data_size = 0;
+	txd->chim_size = 0;
+
+	return txd;
+}
+
+static void hn_txd_put(struct hn_tx_queue *txq, struct hn_txdesc *txd)
+{
+	rte_mempool_put(txq->txdesc_pool, txd);
 }
 
 void
 hn_dev_tx_queue_release(void *arg)
 {
 	struct hn_tx_queue *txq = arg;
-	struct hn_txdesc *txd;
 
 	PMD_INIT_FUNC_TRACE();
 
 	if (!txq)
 		return;
 
-	/* If any pending data is still present just drop it */
-	txd = txq->agg_txd;
-	if (txd)
-		rte_mempool_put(txq->hv->tx_pool, txd);
+	if (txq->txdesc_pool)
+		rte_mempool_free(txq->txdesc_pool);
 
+	rte_free(txq->tx_rndis);
 	rte_free(txq);
 }
 
@@ -292,6 +359,7 @@ static void
 hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id,
 		      unsigned long xactid, const struct hn_nvs_rndis_ack *ack)
 {
+	struct hn_data *hv = dev->data->dev_private;
 	struct hn_txdesc *txd = (struct hn_txdesc *)xactid;
 	struct hn_tx_queue *txq;
 
@@ -312,9 +380,11 @@ hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id,
 		++txq->stats.errors;
 	}
 
-	rte_pktmbuf_free(txd->m);
+	if (txd->chim_index != NVS_CHIM_IDX_INVALID)
+		hn_chim_free(hv, txd->chim_index);
 
-	rte_mempool_put(txq->hv->tx_pool, txd);
+	rte_pktmbuf_free(txd->m);
+	hn_txd_put(txq, txd);
 }
 
 /* Handle transmit completion events */
@@ -1036,28 +1106,15 @@ static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig)
 	return ret;
 }
 
-static struct hn_txdesc *hn_new_txd(struct hn_data *hv,
-				    struct hn_tx_queue *txq)
-{
-	struct hn_txdesc *txd;
-
-	if (rte_mempool_get(hv->tx_pool, (void **)&txd)) {
-		++txq->stats.ring_full;
-		PMD_TX_LOG(DEBUG, "tx pool exhausted!");
-		return NULL;
-	}
-
-	txd->m = NULL;
-	txd->queue_id = txq->queue_id;
-	txd->packets = 0;
-	txd->data_size = 0;
-	txd->chim_size = 0;
-
-	return txd;
-}
-
+/*
+ * Try and find a place in a send chimney buffer to put
+ * the small packet. If space is available, this routine
+ * returns a pointer of where to place the data.
+ * If no space, caller should try direct transmit.
+ */
 static void *
-hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize)
+hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq,
+	     struct hn_txdesc *txd, uint32_t pktsize)
 {
 	struct hn_txdesc *agg_txd = txq->agg_txd;
 	struct rndis_packet_msg *pkt;
@@ -1085,7 +1142,7 @@ hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize)
 		}
 
 		chim = (uint8_t *)pkt + pkt->len;
-
+		txq->agg_prevpkt = chim;
 		txq->agg_pktleft--;
 		txq->agg_szleft -= pktsize;
 		if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) {
@@ -1095,18 +1152,21 @@ hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize)
 			 */
 			txq->agg_pktleft = 0;
 		}
-	} else {
-		agg_txd = hn_new_txd(hv, txq);
-		if (!agg_txd)
-			return NULL;
-
-		chim = (uint8_t *)hv->chim_res->addr
-			+ agg_txd->chim_index * hv->chim_szmax;
 
-		txq->agg_txd = agg_txd;
-		txq->agg_pktleft = txq->agg_pktmax - 1;
-		txq->agg_szleft = txq->agg_szmax - pktsize;
+		hn_txd_put(txq, txd);
+		return chim;
 	}
+
+	txd->chim_index = hn_chim_alloc(hv);
+	if (txd->chim_index == NVS_CHIM_IDX_INVALID)
+		return NULL;
+
+	chim = (uint8_t *)hv->chim_res->addr
+			+ txd->chim_index * hv->chim_szmax;
+
+	txq->agg_txd = txd;
+	txq->agg_pktleft = txq->agg_pktmax - 1;
+	txq->agg_szleft = txq->agg_szmax - pktsize;
 	txq->agg_prevpkt = chim;
 
 	return chim;
@@ -1329,13 +1389,18 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		return (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts);
 	}
 
-	if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh)
+	if (rte_mempool_avail_count(txq->txdesc_pool) <= txq->free_thresh)
 		hn_process_events(hv, txq->queue_id, 0);
 
 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 		struct rte_mbuf *m = tx_pkts[nb_tx];
 		uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN;
 		struct rndis_packet_msg *pkt;
+		struct hn_txdesc *txd;
+
+		txd = hn_txd_get(txq);
+		if (txd == NULL)
+			break;
 
 		/* For small packets aggregate them in chimney buffer */
 		if (m->pkt_len < HN_TXCOPY_THRESHOLD && pkt_size <= txq->agg_szmax) {
@@ -1346,7 +1411,8 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 					goto fail;
 			}
 
-			pkt = hn_try_txagg(hv, txq, pkt_size);
+
+			pkt = hn_try_txagg(hv, txq, txd, pkt_size);
 			if (unlikely(!pkt))
 				break;
 
@@ -1360,21 +1426,13 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 			    hn_flush_txagg(txq, &need_sig))
 				goto fail;
 		} else {
-			struct hn_txdesc *txd;
-
-			/* can send chimney data and large packet at once */
-			txd = txq->agg_txd;
-			if (txd) {
-				hn_reset_txagg(txq);
-			} else {
-				txd = hn_new_txd(hv, txq);
-				if (unlikely(!txd))
-					break;
-			}
+			/* Send any outstanding packets in buffer */
+			if (txq->agg_txd && hn_flush_txagg(txq, &need_sig))
+				goto fail;
 
 			pkt = txd->rndis_pkt;
 			txd->m = m;
-			txd->data_size += m->pkt_len;
+			txd->data_size = m->pkt_len;
 			++txd->packets;
 
 			hn_encap(pkt, queue_id, m);
@@ -1383,7 +1441,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 			if (unlikely(ret != 0)) {
 				PMD_TX_LOG(NOTICE, "sg send failed: %d", ret);
 				++txq->stats.errors;
-				rte_mempool_put(hv->tx_pool, txd);
+				hn_txd_put(txq, txd);
 				goto fail;
 			}
 		}
diff --git a/drivers/net/netvsc/hn_var.h b/drivers/net/netvsc/hn_var.h
index 05bc492511ec..bd8aeada76ee 100644
--- a/drivers/net/netvsc/hn_var.h
+++ b/drivers/net/netvsc/hn_var.h
@@ -52,6 +52,8 @@ struct hn_tx_queue {
 	uint16_t	port_id;
 	uint16_t	queue_id;
 	uint32_t	free_thresh;
+	struct rte_mempool *txdesc_pool;
+	void 		*tx_rndis;
 
 	/* Applied packet transmission aggregation limits. */
 	uint32_t	agg_szmax;
@@ -115,8 +117,10 @@ struct hn_data {
 	uint16_t	num_queues;
 	uint64_t	rss_offloads;
 
+	rte_spinlock_t	chim_lock;
 	struct rte_mem_resource *chim_res;	/* UIO resource for Tx */
-	struct rte_mempool *tx_pool;		/* Tx descriptors */
+	struct rte_bitmap *chim_bmap;		/* Send buffer map */
+	void		*chim_bmem;
 	uint32_t	chim_szmax;		/* Max size per buffer */
 	uint32_t	chim_cnt;		/* Max packets per buffer */
 
@@ -157,8 +161,8 @@ uint16_t hn_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 uint16_t hn_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		      uint16_t nb_pkts);
 
-int	hn_tx_pool_init(struct rte_eth_dev *dev);
-void	hn_tx_pool_uninit(struct rte_eth_dev *dev);
+int	hn_chim_init(struct rte_eth_dev *dev);
+void	hn_chim_uninit(struct rte_eth_dev *dev);
 int	hn_dev_link_update(struct rte_eth_dev *dev, int wait);
 int	hn_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 			      uint16_t nb_desc, unsigned int socket_id,
-- 
2.20.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-stable] [PATCH v2 4/4] net/netvsc: don't enable RSS if only single receive queue
       [not found] ` <20200318202957.18121-1-stephen@networkplumber.org>
                     ` (2 preceding siblings ...)
  2020-03-18 20:29   ` [dpdk-stable] [PATCH v2 3/4] net/netvsc: split send buffers from transmit descriptors Stephen Hemminger
@ 2020-03-18 20:29   ` Stephen Hemminger
  3 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2020-03-18 20:29 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, stable

If application has multiple transmit queues but only a single
receive queue, then do not enable RSS.

Fixes: 8b945a7f7dcb ("drivers/net: update Rx RSS hash offload capabilities")
Cc: stable@dpdk.org
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/netvsc/hn_ethdev.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index ac6610838008..85f43996a623 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -597,11 +597,14 @@ static int hn_dev_configure(struct rte_eth_dev *dev)
 			return err;
 		}
 
-		err = hn_rndis_conf_rss(hv, 0);
-		if (err) {
-			PMD_DRV_LOG(NOTICE,
-				    "initial RSS config failed");
-			return err;
+		if ((dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_RSS_HASH) &&
+		    dev->data->nb_rx_queues > 1) {
+			err = hn_rndis_conf_rss(hv, 0);
+			if (err) {
+				PMD_DRV_LOG(NOTICE,
+					    "initial RSS config failed");
+				return err;
+			}
 		}
 	}
 
-- 
2.20.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-stable] [PATCH v3 1/4] net/netvsc: propagate descriptor limits from VF to netvsc
       [not found] ` <20200318205104.22486-1-stephen@networkplumber.org>
@ 2020-03-18 20:51   ` Stephen Hemminger
  2020-03-18 20:51   ` [dpdk-stable] [PATCH v3 2/4] net/netvsc: handle receive packets during multi-channel setup Stephen Hemminger
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2020-03-18 20:51 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, stable

If application cares about descriptor limits, the netvsc device
values should reflect those of the VF as well.

Fixes: dc7680e8597c ("net/netvsc: support integrated VF")
Cc: stable@dpdk.org
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/netvsc/hn_vf.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/netvsc/hn_vf.c b/drivers/net/netvsc/hn_vf.c
index 7a3734cadfa4..1261b2e2ef85 100644
--- a/drivers/net/netvsc/hn_vf.c
+++ b/drivers/net/netvsc/hn_vf.c
@@ -167,6 +167,17 @@ hn_nvs_handle_vfassoc(struct rte_eth_dev *dev,
 		hn_vf_remove(hv);
 }
 
+static void
+hn_vf_merge_desc_lim(struct rte_eth_desc_lim *lim,
+		     const struct rte_eth_desc_lim *vf_lim)
+{
+	lim->nb_max = RTE_MIN(vf_lim->nb_max, lim->nb_max);
+	lim->nb_min = RTE_MAX(vf_lim->nb_min, lim->nb_min);
+	lim->nb_align = RTE_MAX(vf_lim->nb_align, lim->nb_align);
+	lim->nb_seg_max = RTE_MIN(vf_lim->nb_seg_max, lim->nb_seg_max);
+	lim->nb_mtu_seg_max = RTE_MIN(vf_lim->nb_seg_max, lim->nb_seg_max);
+}
+
 /*
  * Merge the info from the VF and synthetic path.
  * use the default config of the VF
@@ -196,11 +207,13 @@ static int hn_vf_info_merge(struct rte_eth_dev *vf_dev,
 				      info->max_tx_queues);
 	info->tx_offload_capa &= vf_info.tx_offload_capa;
 	info->tx_queue_offload_capa &= vf_info.tx_queue_offload_capa;
+	hn_vf_merge_desc_lim(&info->tx_desc_lim, &vf_info.tx_desc_lim);
 
 	info->min_rx_bufsize = RTE_MAX(vf_info.min_rx_bufsize,
 				       info->min_rx_bufsize);
 	info->max_rx_pktlen  = RTE_MAX(vf_info.max_rx_pktlen,
 				       info->max_rx_pktlen);
+	hn_vf_merge_desc_lim(&info->rx_desc_lim, &vf_info.rx_desc_lim);
 
 	return 0;
 }
-- 
2.20.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-stable] [PATCH v3 2/4] net/netvsc: handle receive packets during multi-channel setup
       [not found] ` <20200318205104.22486-1-stephen@networkplumber.org>
  2020-03-18 20:51   ` [dpdk-stable] [PATCH v3 1/4] net/netvsc: propagate descriptor limits from VF to netvsc Stephen Hemminger
@ 2020-03-18 20:51   ` Stephen Hemminger
  2020-03-18 20:51   ` [dpdk-stable] [PATCH v3 3/4] net/netvsc: split send buffers from transmit descriptors Stephen Hemminger
  2020-03-18 20:51   ` [dpdk-stable] [PATCH v3 4/4] net/netvsc: don't enable RSS if only single receive queue Stephen Hemminger
  3 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2020-03-18 20:51 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, stable

It is possible for a packet to arrive during the configuration
process when setting up multiple queue mode. This would cause
configure to fail; fix by just ignoring receive packets while
waiting for control commands.

Use the receive ring lock to avoid possible races between
oddly behaved applications doing rx_burst and control operations
concurrently.

Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device")
Cc: stable@dpdk.org
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/netvsc/hn_nvs.c | 41 +++++++++++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/drivers/net/netvsc/hn_nvs.c b/drivers/net/netvsc/hn_nvs.c
index 6b518685ab6f..477202b2a0b7 100644
--- a/drivers/net/netvsc/hn_nvs.c
+++ b/drivers/net/netvsc/hn_nvs.c
@@ -54,7 +54,7 @@ static int hn_nvs_req_send(struct hn_data *hv,
 }
 
 static int
-hn_nvs_execute(struct hn_data *hv,
+__hn_nvs_execute(struct hn_data *hv,
 	       void *req, uint32_t reqlen,
 	       void *resp, uint32_t resplen,
 	       uint32_t type)
@@ -62,6 +62,7 @@ hn_nvs_execute(struct hn_data *hv,
 	struct vmbus_channel *chan = hn_primary_chan(hv);
 	char buffer[NVS_RESPSIZE_MAX];
 	const struct hn_nvs_hdr *hdr;
+	uint64_t xactid;
 	uint32_t len;
 	int ret;
 
@@ -77,7 +78,7 @@ hn_nvs_execute(struct hn_data *hv,
 
  retry:
 	len = sizeof(buffer);
-	ret = rte_vmbus_chan_recv(chan, buffer, &len, NULL);
+	ret = rte_vmbus_chan_recv(chan, buffer, &len, &xactid);
 	if (ret == -EAGAIN) {
 		rte_delay_us(HN_CHAN_INTERVAL_US);
 		goto retry;
@@ -88,7 +89,20 @@ hn_nvs_execute(struct hn_data *hv,
 		return ret;
 	}
 
+	if (len < sizeof(*hdr)) {
+		PMD_DRV_LOG(ERR, "response missing NVS header");
+		return -EINVAL;
+	}
+
 	hdr = (struct hn_nvs_hdr *)buffer;
+
+	/* Silently drop received packets while waiting for response */
+	if (hdr->type == NVS_TYPE_RNDIS) {
+		hn_nvs_ack_rxbuf(chan, xactid);
+		--hv->rxbuf_outstanding;
+		goto retry;
+	}
+
 	if (hdr->type != type) {
 		PMD_DRV_LOG(ERR, "unexpected NVS resp %#x, expect %#x",
 			    hdr->type, type);
@@ -108,6 +122,29 @@ hn_nvs_execute(struct hn_data *hv,
 	return 0;
 }
 
+
+/*
+ * Execute one control command and get the response.
+ * Only one command can be active on a channel at once
+ * Unlike BSD, DPDK does not have an interrupt context
+ * so the polling is required to wait for response.
+ */
+static int
+hn_nvs_execute(struct hn_data *hv,
+	       void *req, uint32_t reqlen,
+	       void *resp, uint32_t resplen,
+	       uint32_t type)
+{
+	struct hn_rx_queue *rxq = hv->primary;
+	int ret;
+
+	rte_spinlock_lock(&rxq->ring_lock);
+	ret = __hn_nvs_execute(hv, req, reqlen, resp, resplen, type);
+	rte_spinlock_unlock(&rxq->ring_lock);
+
+	return ret;
+}
+
 static int
 hn_nvs_doinit(struct hn_data *hv, uint32_t nvs_ver)
 {
-- 
2.20.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-stable] [PATCH v3 3/4] net/netvsc: split send buffers from transmit descriptors
       [not found] ` <20200318205104.22486-1-stephen@networkplumber.org>
  2020-03-18 20:51   ` [dpdk-stable] [PATCH v3 1/4] net/netvsc: propagate descriptor limits from VF to netvsc Stephen Hemminger
  2020-03-18 20:51   ` [dpdk-stable] [PATCH v3 2/4] net/netvsc: handle receive packets during multi-channel setup Stephen Hemminger
@ 2020-03-18 20:51   ` Stephen Hemminger
  2020-03-18 20:51   ` [dpdk-stable] [PATCH v3 4/4] net/netvsc: don't enable RSS if only single receive queue Stephen Hemminger
  3 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2020-03-18 20:51 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, stable

The VMBus has reserved transmit area (per device) and transmit descriptors
(per queue). The previous code was always having a 1:1 mapping between
send buffers and descriptors. This can lead to one queue starving another
and also buffer bloat.

Change to working more like FreeBSD where there is a pool of transmit
descriptors per queue. If send buffer is not available then no aggregation
happens but the queue can still drain.

Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device")
Cc: stable@dpdk.org
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/netvsc/hn_ethdev.c |   9 +-
 drivers/net/netvsc/hn_rxtx.c   | 261 ++++++++++++++++++++-------------
 drivers/net/netvsc/hn_var.h    |  10 +-
 3 files changed, 174 insertions(+), 106 deletions(-)

diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index 564620748daf..ac6610838008 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -257,6 +257,9 @@ static int hn_dev_info_get(struct rte_eth_dev *dev,
 	dev_info->max_rx_queues = hv->max_queues;
 	dev_info->max_tx_queues = hv->max_queues;
 
+	dev_info->tx_desc_lim.nb_min = 1;
+	dev_info->tx_desc_lim.nb_max = 4096;
+
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
 
@@ -982,7 +985,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
 	if  (err)
 		goto failed;
 
-	err = hn_tx_pool_init(eth_dev);
+	err = hn_chim_init(eth_dev);
 	if (err)
 		goto failed;
 
@@ -1018,7 +1021,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
 failed:
 	PMD_INIT_LOG(NOTICE, "device init failed");
 
-	hn_tx_pool_uninit(eth_dev);
+	hn_chim_uninit(eth_dev);
 	hn_detach(hv);
 	return err;
 }
@@ -1042,7 +1045,7 @@ eth_hn_dev_uninit(struct rte_eth_dev *eth_dev)
 	eth_dev->rx_pkt_burst = NULL;
 
 	hn_detach(hv);
-	hn_tx_pool_uninit(eth_dev);
+	hn_chim_uninit(eth_dev);
 	rte_vmbus_chan_close(hv->primary->chan);
 	rte_free(hv->primary);
 	ret = rte_eth_dev_owner_delete(hv->owner.id);
diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c
index 7212780c156e..eea09a431ec6 100644
--- a/drivers/net/netvsc/hn_rxtx.c
+++ b/drivers/net/netvsc/hn_rxtx.c
@@ -18,6 +18,7 @@
 #include <rte_memzone.h>
 #include <rte_malloc.h>
 #include <rte_atomic.h>
+#include <rte_bitmap.h>
 #include <rte_branch_prediction.h>
 #include <rte_ether.h>
 #include <rte_common.h>
@@ -83,7 +84,7 @@ struct hn_txdesc {
 	struct rte_mbuf *m;
 
 	uint16_t	queue_id;
-	uint16_t	chim_index;
+	uint32_t	chim_index;
 	uint32_t	chim_size;
 	uint32_t	data_size;
 	uint32_t	packets;
@@ -98,6 +99,8 @@ struct hn_txdesc {
 	 RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) +	\
 	 RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
 
+#define HN_RNDIS_PKT_ALIGNED	RTE_ALIGN(HN_RNDIS_PKT_LEN, RTE_CACHE_LINE_SIZE)
+
 /* Minimum space required for a packet */
 #define HN_PKTSIZE_MIN(align) \
 	RTE_ALIGN(RTE_ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align)
@@ -150,63 +153,80 @@ hn_rndis_pktmsg_offset(uint32_t ofs)
 static void hn_txd_init(struct rte_mempool *mp __rte_unused,
 			void *opaque, void *obj, unsigned int idx)
 {
+	struct hn_tx_queue *txq = opaque;
 	struct hn_txdesc *txd = obj;
-	struct rte_eth_dev *dev = opaque;
-	struct rndis_packet_msg *pkt;
 
 	memset(txd, 0, sizeof(*txd));
-	txd->chim_index = idx;
-
-	pkt = rte_malloc_socket("RNDIS_TX", HN_RNDIS_PKT_LEN,
-				rte_align32pow2(HN_RNDIS_PKT_LEN),
-				dev->device->numa_node);
-	if (!pkt)
-		rte_exit(EXIT_FAILURE, "can not allocate RNDIS header");
 
-	txd->rndis_pkt = pkt;
+	txd->queue_id = txq->queue_id;
+	txd->chim_index = NVS_CHIM_IDX_INVALID;
+	txd->rndis_pkt = (struct rndis_packet_msg *)(char *)txq->tx_rndis
+		+ idx * HN_RNDIS_PKT_ALIGNED;
 }
 
-/*
- * Unlike Linux and FreeBSD, this driver uses a mempool
- * to limit outstanding transmits and reserve buffers
- */
 int
-hn_tx_pool_init(struct rte_eth_dev *dev)
+hn_chim_init(struct rte_eth_dev *dev)
 {
 	struct hn_data *hv = dev->data->dev_private;
-	char name[RTE_MEMPOOL_NAMESIZE];
-	struct rte_mempool *mp;
+	uint32_t i, chim_bmp_size;
+
+	rte_spinlock_init(&hv->chim_lock);
+	chim_bmp_size = rte_bitmap_get_memory_footprint(hv->chim_cnt);
+	hv->chim_bmem = rte_zmalloc("hn_chim_bitmap", chim_bmp_size,
+				    RTE_CACHE_LINE_SIZE);
+	if (hv->chim_bmem == NULL) {
+		PMD_INIT_LOG(ERR, "failed to allocate bitmap size %u",
+			     chim_bmp_size);
+		return -1;
+	}
 
-	snprintf(name, sizeof(name),
-		 "hn_txd_%u", dev->data->port_id);
-
-	PMD_INIT_LOG(DEBUG, "create a TX send pool %s n=%u size=%zu socket=%d",
-		     name, hv->chim_cnt, sizeof(struct hn_txdesc),
-		     dev->device->numa_node);
-
-	mp = rte_mempool_create(name, hv->chim_cnt, sizeof(struct hn_txdesc),
-				HN_TXD_CACHE_SIZE, 0,
-				NULL, NULL,
-				hn_txd_init, dev,
-				dev->device->numa_node, 0);
-	if (!mp) {
-		PMD_DRV_LOG(ERR,
-			    "mempool %s create failed: %d", name, rte_errno);
-		return -rte_errno;
+	PMD_INIT_LOG(DEBUG, "create a TX bitmap n=%u size=%u",
+		     hv->chim_cnt, chim_bmp_size);
+
+	hv->chim_bmap = rte_bitmap_init(hv->chim_cnt, hv->chim_bmem,
+					chim_bmp_size);
+	if (hv->chim_bmap == NULL) {
+		PMD_INIT_LOG(ERR, "failed to init chim bitmap");
+		return -1;
 	}
 
-	hv->tx_pool = mp;
+	for (i = 0; i < hv->chim_cnt; i++)
+		rte_bitmap_set(hv->chim_bmap, i);
+
 	return 0;
 }
 
 void
-hn_tx_pool_uninit(struct rte_eth_dev *dev)
+hn_chim_uninit(struct rte_eth_dev *dev)
 {
 	struct hn_data *hv = dev->data->dev_private;
 
-	if (hv->tx_pool) {
-		rte_mempool_free(hv->tx_pool);
-		hv->tx_pool = NULL;
+	rte_bitmap_free(hv->chim_bmap);
+	rte_free(hv->chim_bmem);
+	hv->chim_bmem = NULL;
+}
+
+static uint32_t hn_chim_alloc(struct hn_data *hv)
+{
+	uint32_t index = NVS_CHIM_IDX_INVALID;
+	uint64_t slab;
+
+	rte_spinlock_lock(&hv->chim_lock);
+	if (rte_bitmap_scan(hv->chim_bmap, &index, &slab))
+		rte_bitmap_clear(hv->chim_bmap, index);
+	rte_spinlock_unlock(&hv->chim_lock);
+
+	return index;
+}
+
+static void hn_chim_free(struct hn_data *hv, uint32_t chim_idx)
+{
+	if (chim_idx >= hv->chim_cnt) {
+		PMD_DRV_LOG(ERR, "Invalid chimney index %u", chim_idx);
+	} else {
+		rte_spinlock_lock(&hv->chim_lock);
+		rte_bitmap_set(hv->chim_bmap, chim_idx);
+		rte_spinlock_unlock(&hv->chim_lock);
 	}
 }
 
@@ -220,15 +240,16 @@ static void hn_reset_txagg(struct hn_tx_queue *txq)
 
 int
 hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
-		      uint16_t queue_idx, uint16_t nb_desc __rte_unused,
+		      uint16_t queue_idx, uint16_t nb_desc,
 		      unsigned int socket_id,
 		      const struct rte_eth_txconf *tx_conf)
 
 {
 	struct hn_data *hv = dev->data->dev_private;
 	struct hn_tx_queue *txq;
+	char name[RTE_MEMPOOL_NAMESIZE];
 	uint32_t tx_free_thresh;
-	int err;
+	int err = -ENOMEM;
 
 	PMD_INIT_FUNC_TRACE();
 
@@ -252,6 +273,29 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
 	txq->free_thresh = tx_free_thresh;
 
+	snprintf(name, sizeof(name),
+		 "hn_txd_%u_%u", dev->data->port_id, queue_idx);
+
+	PMD_INIT_LOG(DEBUG, "TX descriptor pool %s n=%u size=%zu",
+		     name, nb_desc, sizeof(struct hn_txdesc));
+
+	txq->tx_rndis = rte_calloc("hn_txq_rndis", nb_desc,
+				   HN_RNDIS_PKT_ALIGNED, RTE_CACHE_LINE_SIZE);
+	if (txq->tx_rndis == NULL)
+		goto error;
+
+	txq->txdesc_pool = rte_mempool_create(name, nb_desc,
+					      sizeof(struct hn_txdesc),
+					      0, 0,
+					      NULL, NULL,
+					      hn_txd_init, txq,
+					      dev->device->numa_node, 0);
+	if (txq->txdesc_pool == NULL) {
+		PMD_DRV_LOG(ERR,
+			    "mempool %s create failed: %d", name, rte_errno);
+		goto error;
+	}
+
 	txq->agg_szmax  = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size);
 	txq->agg_pktmax = hv->rndis_agg_pkts;
 	txq->agg_align  = hv->rndis_agg_align;
@@ -260,31 +304,57 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
 	err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc,
 				     socket_id, tx_conf);
-	if (err) {
-		rte_free(txq);
-		return err;
+	if (err == 0) {
+		dev->data->tx_queues[queue_idx] = txq;
+		return 0;
 	}
 
-	dev->data->tx_queues[queue_idx] = txq;
-	return 0;
+error:
+	if (txq->txdesc_pool)
+		rte_mempool_free(txq->txdesc_pool);
+	rte_free(txq->tx_rndis);
+	rte_free(txq);
+	return err;
+}
+
+
+static struct hn_txdesc *hn_txd_get(struct hn_tx_queue *txq)
+{
+	struct hn_txdesc *txd;
+
+	if (rte_mempool_get(txq->txdesc_pool, (void **)&txd)) {
+		++txq->stats.ring_full;
+		PMD_TX_LOG(DEBUG, "tx pool exhausted!");
+		return NULL;
+	}
+
+	txd->m = NULL;
+	txd->packets = 0;
+	txd->data_size = 0;
+	txd->chim_size = 0;
+
+	return txd;
+}
+
+static void hn_txd_put(struct hn_tx_queue *txq, struct hn_txdesc *txd)
+{
+	rte_mempool_put(txq->txdesc_pool, txd);
 }
 
 void
 hn_dev_tx_queue_release(void *arg)
 {
 	struct hn_tx_queue *txq = arg;
-	struct hn_txdesc *txd;
 
 	PMD_INIT_FUNC_TRACE();
 
 	if (!txq)
 		return;
 
-	/* If any pending data is still present just drop it */
-	txd = txq->agg_txd;
-	if (txd)
-		rte_mempool_put(txq->hv->tx_pool, txd);
+	if (txq->txdesc_pool)
+		rte_mempool_free(txq->txdesc_pool);
 
+	rte_free(txq->tx_rndis);
 	rte_free(txq);
 }
 
@@ -292,6 +362,7 @@ static void
 hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id,
 		      unsigned long xactid, const struct hn_nvs_rndis_ack *ack)
 {
+	struct hn_data *hv = dev->data->dev_private;
 	struct hn_txdesc *txd = (struct hn_txdesc *)xactid;
 	struct hn_tx_queue *txq;
 
@@ -312,9 +383,11 @@ hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id,
 		++txq->stats.errors;
 	}
 
-	rte_pktmbuf_free(txd->m);
+	if (txd->chim_index != NVS_CHIM_IDX_INVALID)
+		hn_chim_free(hv, txd->chim_index);
 
-	rte_mempool_put(txq->hv->tx_pool, txd);
+	rte_pktmbuf_free(txd->m);
+	hn_txd_put(txq, txd);
 }
 
 /* Handle transmit completion events */
@@ -1036,28 +1109,15 @@ static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig)
 	return ret;
 }
 
-static struct hn_txdesc *hn_new_txd(struct hn_data *hv,
-				    struct hn_tx_queue *txq)
-{
-	struct hn_txdesc *txd;
-
-	if (rte_mempool_get(hv->tx_pool, (void **)&txd)) {
-		++txq->stats.ring_full;
-		PMD_TX_LOG(DEBUG, "tx pool exhausted!");
-		return NULL;
-	}
-
-	txd->m = NULL;
-	txd->queue_id = txq->queue_id;
-	txd->packets = 0;
-	txd->data_size = 0;
-	txd->chim_size = 0;
-
-	return txd;
-}
-
+/*
+ * Try and find a place in a send chimney buffer to put
+ * the small packet. If space is available, this routine
+ * returns a pointer of where to place the data.
+ * If no space, caller should try direct transmit.
+ */
 static void *
-hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize)
+hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq,
+	     struct hn_txdesc *txd, uint32_t pktsize)
 {
 	struct hn_txdesc *agg_txd = txq->agg_txd;
 	struct rndis_packet_msg *pkt;
@@ -1085,7 +1145,7 @@ hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize)
 		}
 
 		chim = (uint8_t *)pkt + pkt->len;
-
+		txq->agg_prevpkt = chim;
 		txq->agg_pktleft--;
 		txq->agg_szleft -= pktsize;
 		if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) {
@@ -1095,18 +1155,21 @@ hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize)
 			 */
 			txq->agg_pktleft = 0;
 		}
-	} else {
-		agg_txd = hn_new_txd(hv, txq);
-		if (!agg_txd)
-			return NULL;
-
-		chim = (uint8_t *)hv->chim_res->addr
-			+ agg_txd->chim_index * hv->chim_szmax;
 
-		txq->agg_txd = agg_txd;
-		txq->agg_pktleft = txq->agg_pktmax - 1;
-		txq->agg_szleft = txq->agg_szmax - pktsize;
+		hn_txd_put(txq, txd);
+		return chim;
 	}
+
+	txd->chim_index = hn_chim_alloc(hv);
+	if (txd->chim_index == NVS_CHIM_IDX_INVALID)
+		return NULL;
+
+	chim = (uint8_t *)hv->chim_res->addr
+			+ txd->chim_index * hv->chim_szmax;
+
+	txq->agg_txd = txd;
+	txq->agg_pktleft = txq->agg_pktmax - 1;
+	txq->agg_szleft = txq->agg_szmax - pktsize;
 	txq->agg_prevpkt = chim;
 
 	return chim;
@@ -1329,13 +1392,18 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		return (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts);
 	}
 
-	if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh)
+	if (rte_mempool_avail_count(txq->txdesc_pool) <= txq->free_thresh)
 		hn_process_events(hv, txq->queue_id, 0);
 
 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 		struct rte_mbuf *m = tx_pkts[nb_tx];
 		uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN;
 		struct rndis_packet_msg *pkt;
+		struct hn_txdesc *txd;
+
+		txd = hn_txd_get(txq);
+		if (txd == NULL)
+			break;
 
 		/* For small packets aggregate them in chimney buffer */
 		if (m->pkt_len < HN_TXCOPY_THRESHOLD && pkt_size <= txq->agg_szmax) {
@@ -1346,7 +1414,8 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 					goto fail;
 			}
 
-			pkt = hn_try_txagg(hv, txq, pkt_size);
+
+			pkt = hn_try_txagg(hv, txq, txd, pkt_size);
 			if (unlikely(!pkt))
 				break;
 
@@ -1360,21 +1429,13 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 			    hn_flush_txagg(txq, &need_sig))
 				goto fail;
 		} else {
-			struct hn_txdesc *txd;
-
-			/* can send chimney data and large packet at once */
-			txd = txq->agg_txd;
-			if (txd) {
-				hn_reset_txagg(txq);
-			} else {
-				txd = hn_new_txd(hv, txq);
-				if (unlikely(!txd))
-					break;
-			}
+			/* Send any outstanding packets in buffer */
+			if (txq->agg_txd && hn_flush_txagg(txq, &need_sig))
+				goto fail;
 
 			pkt = txd->rndis_pkt;
 			txd->m = m;
-			txd->data_size += m->pkt_len;
+			txd->data_size = m->pkt_len;
 			++txd->packets;
 
 			hn_encap(pkt, queue_id, m);
@@ -1383,7 +1444,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 			if (unlikely(ret != 0)) {
 				PMD_TX_LOG(NOTICE, "sg send failed: %d", ret);
 				++txq->stats.errors;
-				rte_mempool_put(hv->tx_pool, txd);
+				hn_txd_put(txq, txd);
 				goto fail;
 			}
 		}
diff --git a/drivers/net/netvsc/hn_var.h b/drivers/net/netvsc/hn_var.h
index 05bc492511ec..822d737bd3cc 100644
--- a/drivers/net/netvsc/hn_var.h
+++ b/drivers/net/netvsc/hn_var.h
@@ -52,6 +52,8 @@ struct hn_tx_queue {
 	uint16_t	port_id;
 	uint16_t	queue_id;
 	uint32_t	free_thresh;
+	struct rte_mempool *txdesc_pool;
+	void		*tx_rndis;
 
 	/* Applied packet transmission aggregation limits. */
 	uint32_t	agg_szmax;
@@ -115,8 +117,10 @@ struct hn_data {
 	uint16_t	num_queues;
 	uint64_t	rss_offloads;
 
+	rte_spinlock_t	chim_lock;
 	struct rte_mem_resource *chim_res;	/* UIO resource for Tx */
-	struct rte_mempool *tx_pool;		/* Tx descriptors */
+	struct rte_bitmap *chim_bmap;		/* Send buffer map */
+	void		*chim_bmem;
 	uint32_t	chim_szmax;		/* Max size per buffer */
 	uint32_t	chim_cnt;		/* Max packets per buffer */
 
@@ -157,8 +161,8 @@ uint16_t hn_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 uint16_t hn_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		      uint16_t nb_pkts);
 
-int	hn_tx_pool_init(struct rte_eth_dev *dev);
-void	hn_tx_pool_uninit(struct rte_eth_dev *dev);
+int	hn_chim_init(struct rte_eth_dev *dev);
+void	hn_chim_uninit(struct rte_eth_dev *dev);
 int	hn_dev_link_update(struct rte_eth_dev *dev, int wait);
 int	hn_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 			      uint16_t nb_desc, unsigned int socket_id,
-- 
2.20.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-stable] [PATCH v3 4/4] net/netvsc: don't enable RSS if only single receive queue
       [not found] ` <20200318205104.22486-1-stephen@networkplumber.org>
                     ` (2 preceding siblings ...)
  2020-03-18 20:51   ` [dpdk-stable] [PATCH v3 3/4] net/netvsc: split send buffers from transmit descriptors Stephen Hemminger
@ 2020-03-18 20:51   ` Stephen Hemminger
  3 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2020-03-18 20:51 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, stable

If application has multiple transmit queues but only a single
receive queue, then do not enable RSS.

Fixes: 8b945a7f7dcb ("drivers/net: update Rx RSS hash offload capabilities")
Cc: stable@dpdk.org
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/netvsc/hn_ethdev.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index ac6610838008..85f43996a623 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -597,11 +597,14 @@ static int hn_dev_configure(struct rte_eth_dev *dev)
 			return err;
 		}
 
-		err = hn_rndis_conf_rss(hv, 0);
-		if (err) {
-			PMD_DRV_LOG(NOTICE,
-				    "initial RSS config failed");
-			return err;
+		if ((dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_RSS_HASH) &&
+		    dev->data->nb_rx_queues > 1) {
+			err = hn_rndis_conf_rss(hv, 0);
+			if (err) {
+				PMD_DRV_LOG(NOTICE,
+					    "initial RSS config failed");
+				return err;
+			}
 		}
 	}
 
-- 
2.20.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-stable] [PATCH v4 1/8] net/netvsc: propagate descriptor limits from VF to netvsc
       [not found] ` <20200331171404.23596-1-stephen@networkplumber.org>
@ 2020-03-31 17:13   ` Stephen Hemminger
  2020-03-31 17:13   ` [dpdk-stable] [PATCH v4 2/8] net/netvsc: handle receive packets during multi-channel setup Stephen Hemminger
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2020-03-31 17:13 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, stable

If application cares about descriptor limits, the netvsc device
values should reflect those of the VF as well.

Fixes: dc7680e8597c ("net/netvsc: support integrated VF")
Cc: stable@dpdk.org
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/netvsc/hn_vf.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/netvsc/hn_vf.c b/drivers/net/netvsc/hn_vf.c
index 7a3734cadfa4..1261b2e2ef85 100644
--- a/drivers/net/netvsc/hn_vf.c
+++ b/drivers/net/netvsc/hn_vf.c
@@ -167,6 +167,17 @@ hn_nvs_handle_vfassoc(struct rte_eth_dev *dev,
 		hn_vf_remove(hv);
 }
 
+static void
+hn_vf_merge_desc_lim(struct rte_eth_desc_lim *lim,
+		     const struct rte_eth_desc_lim *vf_lim)
+{
+	lim->nb_max = RTE_MIN(vf_lim->nb_max, lim->nb_max);
+	lim->nb_min = RTE_MAX(vf_lim->nb_min, lim->nb_min);
+	lim->nb_align = RTE_MAX(vf_lim->nb_align, lim->nb_align);
+	lim->nb_seg_max = RTE_MIN(vf_lim->nb_seg_max, lim->nb_seg_max);
+	lim->nb_mtu_seg_max = RTE_MIN(vf_lim->nb_seg_max, lim->nb_seg_max);
+}
+
 /*
  * Merge the info from the VF and synthetic path.
  * use the default config of the VF
@@ -196,11 +207,13 @@ static int hn_vf_info_merge(struct rte_eth_dev *vf_dev,
 				      info->max_tx_queues);
 	info->tx_offload_capa &= vf_info.tx_offload_capa;
 	info->tx_queue_offload_capa &= vf_info.tx_queue_offload_capa;
+	hn_vf_merge_desc_lim(&info->tx_desc_lim, &vf_info.tx_desc_lim);
 
 	info->min_rx_bufsize = RTE_MAX(vf_info.min_rx_bufsize,
 				       info->min_rx_bufsize);
 	info->max_rx_pktlen  = RTE_MAX(vf_info.max_rx_pktlen,
 				       info->max_rx_pktlen);
+	hn_vf_merge_desc_lim(&info->rx_desc_lim, &vf_info.rx_desc_lim);
 
 	return 0;
 }
-- 
2.20.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-stable] [PATCH v4 2/8] net/netvsc: handle receive packets during multi-channel setup
       [not found] ` <20200331171404.23596-1-stephen@networkplumber.org>
  2020-03-31 17:13   ` [dpdk-stable] [PATCH v4 1/8] net/netvsc: propagate descriptor limits from VF to netvsc Stephen Hemminger
@ 2020-03-31 17:13   ` Stephen Hemminger
  2020-03-31 17:13   ` [dpdk-stable] [PATCH v4 3/8] net/netvsc: split send buffers from transmit descriptors Stephen Hemminger
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2020-03-31 17:13 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, stable

It is possible for a packet to arrive during the configuration
process when setting up multiple queue mode. This would cause
configure to fail; fix by just ignoring receive packets while
waiting for control commands.

Use the receive ring lock to avoid possible races between
oddly behaved applications doing rx_burst and control operations
concurrently.

Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device")
Cc: stable@dpdk.org
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/netvsc/hn_nvs.c | 41 +++++++++++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/drivers/net/netvsc/hn_nvs.c b/drivers/net/netvsc/hn_nvs.c
index 6b518685ab6f..477202b2a0b7 100644
--- a/drivers/net/netvsc/hn_nvs.c
+++ b/drivers/net/netvsc/hn_nvs.c
@@ -54,7 +54,7 @@ static int hn_nvs_req_send(struct hn_data *hv,
 }
 
 static int
-hn_nvs_execute(struct hn_data *hv,
+__hn_nvs_execute(struct hn_data *hv,
 	       void *req, uint32_t reqlen,
 	       void *resp, uint32_t resplen,
 	       uint32_t type)
@@ -62,6 +62,7 @@ hn_nvs_execute(struct hn_data *hv,
 	struct vmbus_channel *chan = hn_primary_chan(hv);
 	char buffer[NVS_RESPSIZE_MAX];
 	const struct hn_nvs_hdr *hdr;
+	uint64_t xactid;
 	uint32_t len;
 	int ret;
 
@@ -77,7 +78,7 @@ hn_nvs_execute(struct hn_data *hv,
 
  retry:
 	len = sizeof(buffer);
-	ret = rte_vmbus_chan_recv(chan, buffer, &len, NULL);
+	ret = rte_vmbus_chan_recv(chan, buffer, &len, &xactid);
 	if (ret == -EAGAIN) {
 		rte_delay_us(HN_CHAN_INTERVAL_US);
 		goto retry;
@@ -88,7 +89,20 @@ hn_nvs_execute(struct hn_data *hv,
 		return ret;
 	}
 
+	if (len < sizeof(*hdr)) {
+		PMD_DRV_LOG(ERR, "response missing NVS header");
+		return -EINVAL;
+	}
+
 	hdr = (struct hn_nvs_hdr *)buffer;
+
+	/* Silently drop received packets while waiting for response */
+	if (hdr->type == NVS_TYPE_RNDIS) {
+		hn_nvs_ack_rxbuf(chan, xactid);
+		--hv->rxbuf_outstanding;
+		goto retry;
+	}
+
 	if (hdr->type != type) {
 		PMD_DRV_LOG(ERR, "unexpected NVS resp %#x, expect %#x",
 			    hdr->type, type);
@@ -108,6 +122,29 @@ hn_nvs_execute(struct hn_data *hv,
 	return 0;
 }
 
+
+/*
+ * Execute one control command and get the response.
+ * Only one command can be active on a channel at once
+ * Unlike BSD, DPDK does not have an interrupt context
+ * so the polling is required to wait for response.
+ */
+static int
+hn_nvs_execute(struct hn_data *hv,
+	       void *req, uint32_t reqlen,
+	       void *resp, uint32_t resplen,
+	       uint32_t type)
+{
+	struct hn_rx_queue *rxq = hv->primary;
+	int ret;
+
+	rte_spinlock_lock(&rxq->ring_lock);
+	ret = __hn_nvs_execute(hv, req, reqlen, resp, resplen, type);
+	rte_spinlock_unlock(&rxq->ring_lock);
+
+	return ret;
+}
+
 static int
 hn_nvs_doinit(struct hn_data *hv, uint32_t nvs_ver)
 {
-- 
2.20.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-stable] [PATCH v4 3/8] net/netvsc: split send buffers from transmit descriptors
       [not found] ` <20200331171404.23596-1-stephen@networkplumber.org>
  2020-03-31 17:13   ` [dpdk-stable] [PATCH v4 1/8] net/netvsc: propagate descriptor limits from VF to netvsc Stephen Hemminger
  2020-03-31 17:13   ` [dpdk-stable] [PATCH v4 2/8] net/netvsc: handle receive packets during multi-channel setup Stephen Hemminger
@ 2020-03-31 17:13   ` Stephen Hemminger
  2020-03-31 17:14   ` [dpdk-stable] [PATCH v4 4/8] net/netvsc: fix invalid rte_free on dev_close Stephen Hemminger
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2020-03-31 17:13 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, stable

The VMBus has reserved transmit area (per device) and transmit descriptors
(per queue). The previous code was always having a 1:1 mapping between
send buffers and descriptors. This can lead to one queue starving another
and also buffer bloat.

Change to working more like FreeBSD where there is a pool of transmit
descriptors per queue. If send buffer is not available then no aggregation
happens but the queue can still drain.

Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device")
Cc: stable@dpdk.org
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/netvsc/hn_ethdev.c |   9 +-
 drivers/net/netvsc/hn_rxtx.c   | 271 ++++++++++++++++++++-------------
 drivers/net/netvsc/hn_var.h    |  10 +-
 3 files changed, 180 insertions(+), 110 deletions(-)

diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index 564620748daf..ac6610838008 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -257,6 +257,9 @@ static int hn_dev_info_get(struct rte_eth_dev *dev,
 	dev_info->max_rx_queues = hv->max_queues;
 	dev_info->max_tx_queues = hv->max_queues;
 
+	dev_info->tx_desc_lim.nb_min = 1;
+	dev_info->tx_desc_lim.nb_max = 4096;
+
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
 
@@ -982,7 +985,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
 	if  (err)
 		goto failed;
 
-	err = hn_tx_pool_init(eth_dev);
+	err = hn_chim_init(eth_dev);
 	if (err)
 		goto failed;
 
@@ -1018,7 +1021,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
 failed:
 	PMD_INIT_LOG(NOTICE, "device init failed");
 
-	hn_tx_pool_uninit(eth_dev);
+	hn_chim_uninit(eth_dev);
 	hn_detach(hv);
 	return err;
 }
@@ -1042,7 +1045,7 @@ eth_hn_dev_uninit(struct rte_eth_dev *eth_dev)
 	eth_dev->rx_pkt_burst = NULL;
 
 	hn_detach(hv);
-	hn_tx_pool_uninit(eth_dev);
+	hn_chim_uninit(eth_dev);
 	rte_vmbus_chan_close(hv->primary->chan);
 	rte_free(hv->primary);
 	ret = rte_eth_dev_owner_delete(hv->owner.id);
diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c
index 7212780c156e..32c03e3da0c7 100644
--- a/drivers/net/netvsc/hn_rxtx.c
+++ b/drivers/net/netvsc/hn_rxtx.c
@@ -18,6 +18,7 @@
 #include <rte_memzone.h>
 #include <rte_malloc.h>
 #include <rte_atomic.h>
+#include <rte_bitmap.h>
 #include <rte_branch_prediction.h>
 #include <rte_ether.h>
 #include <rte_common.h>
@@ -83,7 +84,7 @@ struct hn_txdesc {
 	struct rte_mbuf *m;
 
 	uint16_t	queue_id;
-	uint16_t	chim_index;
+	uint32_t	chim_index;
 	uint32_t	chim_size;
 	uint32_t	data_size;
 	uint32_t	packets;
@@ -98,11 +99,13 @@ struct hn_txdesc {
 	 RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) +	\
 	 RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
 
+#define HN_RNDIS_PKT_ALIGNED	RTE_ALIGN(HN_RNDIS_PKT_LEN, RTE_CACHE_LINE_SIZE)
+
 /* Minimum space required for a packet */
 #define HN_PKTSIZE_MIN(align) \
 	RTE_ALIGN(RTE_ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align)
 
-#define DEFAULT_TX_FREE_THRESH 32U
+#define DEFAULT_TX_FREE_THRESH 32
 
 static void
 hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m)
@@ -150,63 +153,77 @@ hn_rndis_pktmsg_offset(uint32_t ofs)
 static void hn_txd_init(struct rte_mempool *mp __rte_unused,
 			void *opaque, void *obj, unsigned int idx)
 {
+	struct hn_tx_queue *txq = opaque;
 	struct hn_txdesc *txd = obj;
-	struct rte_eth_dev *dev = opaque;
-	struct rndis_packet_msg *pkt;
 
 	memset(txd, 0, sizeof(*txd));
-	txd->chim_index = idx;
-
-	pkt = rte_malloc_socket("RNDIS_TX", HN_RNDIS_PKT_LEN,
-				rte_align32pow2(HN_RNDIS_PKT_LEN),
-				dev->device->numa_node);
-	if (!pkt)
-		rte_exit(EXIT_FAILURE, "can not allocate RNDIS header");
 
-	txd->rndis_pkt = pkt;
+	txd->queue_id = txq->queue_id;
+	txd->chim_index = NVS_CHIM_IDX_INVALID;
+	txd->rndis_pkt = (struct rndis_packet_msg *)(char *)txq->tx_rndis
+		+ idx * HN_RNDIS_PKT_ALIGNED;
 }
 
-/*
- * Unlike Linux and FreeBSD, this driver uses a mempool
- * to limit outstanding transmits and reserve buffers
- */
 int
-hn_tx_pool_init(struct rte_eth_dev *dev)
+hn_chim_init(struct rte_eth_dev *dev)
 {
 	struct hn_data *hv = dev->data->dev_private;
-	char name[RTE_MEMPOOL_NAMESIZE];
-	struct rte_mempool *mp;
+	uint32_t i, chim_bmp_size;
+
+	rte_spinlock_init(&hv->chim_lock);
+	chim_bmp_size = rte_bitmap_get_memory_footprint(hv->chim_cnt);
+	hv->chim_bmem = rte_zmalloc("hn_chim_bitmap", chim_bmp_size,
+				    RTE_CACHE_LINE_SIZE);
+	if (hv->chim_bmem == NULL) {
+		PMD_INIT_LOG(ERR, "failed to allocate bitmap size %u",
+			     chim_bmp_size);
+		return -1;
+	}
 
-	snprintf(name, sizeof(name),
-		 "hn_txd_%u", dev->data->port_id);
-
-	PMD_INIT_LOG(DEBUG, "create a TX send pool %s n=%u size=%zu socket=%d",
-		     name, hv->chim_cnt, sizeof(struct hn_txdesc),
-		     dev->device->numa_node);
-
-	mp = rte_mempool_create(name, hv->chim_cnt, sizeof(struct hn_txdesc),
-				HN_TXD_CACHE_SIZE, 0,
-				NULL, NULL,
-				hn_txd_init, dev,
-				dev->device->numa_node, 0);
-	if (!mp) {
-		PMD_DRV_LOG(ERR,
-			    "mempool %s create failed: %d", name, rte_errno);
-		return -rte_errno;
+	hv->chim_bmap = rte_bitmap_init(hv->chim_cnt,
+					hv->chim_bmem, chim_bmp_size);
+	if (hv->chim_bmap == NULL) {
+		PMD_INIT_LOG(ERR, "failed to init chim bitmap");
+		return -1;
 	}
 
-	hv->tx_pool = mp;
+	for (i = 0; i < hv->chim_cnt; i++)
+		rte_bitmap_set(hv->chim_bmap, i);
+
 	return 0;
 }
 
 void
-hn_tx_pool_uninit(struct rte_eth_dev *dev)
+hn_chim_uninit(struct rte_eth_dev *dev)
 {
 	struct hn_data *hv = dev->data->dev_private;
 
-	if (hv->tx_pool) {
-		rte_mempool_free(hv->tx_pool);
-		hv->tx_pool = NULL;
+	rte_bitmap_free(hv->chim_bmap);
+	rte_free(hv->chim_bmem);
+	hv->chim_bmem = NULL;
+}
+
+static uint32_t hn_chim_alloc(struct hn_data *hv)
+{
+	uint32_t index = NVS_CHIM_IDX_INVALID;
+	uint64_t slab;
+
+	rte_spinlock_lock(&hv->chim_lock);
+	if (rte_bitmap_scan(hv->chim_bmap, &index, &slab))
+		rte_bitmap_clear(hv->chim_bmap, index);
+	rte_spinlock_unlock(&hv->chim_lock);
+
+	return index;
+}
+
+static void hn_chim_free(struct hn_data *hv, uint32_t chim_idx)
+{
+	if (chim_idx >= hv->chim_cnt) {
+		PMD_DRV_LOG(ERR, "Invalid chimney index %u", chim_idx);
+	} else {
+		rte_spinlock_lock(&hv->chim_lock);
+		rte_bitmap_set(hv->chim_bmap, chim_idx);
+		rte_spinlock_unlock(&hv->chim_lock);
 	}
 }
 
@@ -220,15 +237,16 @@ static void hn_reset_txagg(struct hn_tx_queue *txq)
 
 int
 hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
-		      uint16_t queue_idx, uint16_t nb_desc __rte_unused,
+		      uint16_t queue_idx, uint16_t nb_desc,
 		      unsigned int socket_id,
 		      const struct rte_eth_txconf *tx_conf)
 
 {
 	struct hn_data *hv = dev->data->dev_private;
 	struct hn_tx_queue *txq;
+	char name[RTE_MEMPOOL_NAMESIZE];
 	uint32_t tx_free_thresh;
-	int err;
+	int err = -ENOMEM;
 
 	PMD_INIT_FUNC_TRACE();
 
@@ -244,14 +262,42 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
 	tx_free_thresh = tx_conf->tx_free_thresh;
 	if (tx_free_thresh == 0)
-		tx_free_thresh = RTE_MIN(hv->chim_cnt / 4,
+		tx_free_thresh = RTE_MIN(nb_desc / 4,
 					 DEFAULT_TX_FREE_THRESH);
 
-	if (tx_free_thresh >= hv->chim_cnt - 3)
-		tx_free_thresh = hv->chim_cnt - 3;
+	if (tx_free_thresh + 3 >= nb_desc) {
+		PMD_INIT_LOG(ERR,
+			     "tx_free_thresh must be less than the number of TX entries minus 3(%u)."
+			     " (tx_free_thresh=%u port=%u queue=%u)\n",
+			     nb_desc - 3,
+			     tx_free_thresh, dev->data->port_id, queue_idx);
+		return -EINVAL;
+	}
 
 	txq->free_thresh = tx_free_thresh;
 
+	snprintf(name, sizeof(name),
+		 "hn_txd_%u_%u", dev->data->port_id, queue_idx);
+
+	PMD_INIT_LOG(DEBUG, "TX descriptor pool %s n=%u size=%zu",
+		     name, nb_desc, sizeof(struct hn_txdesc));
+
+	txq->tx_rndis = rte_calloc("hn_txq_rndis", nb_desc,
+				   HN_RNDIS_PKT_ALIGNED, RTE_CACHE_LINE_SIZE);
+	if (txq->tx_rndis == NULL)
+		goto error;
+
+	txq->txdesc_pool = rte_mempool_create(name, nb_desc,
+					      sizeof(struct hn_txdesc),
+					      0, 0, NULL, NULL,
+					      hn_txd_init, txq,
+					      dev->device->numa_node, 0);
+	if (txq->txdesc_pool == NULL) {
+		PMD_DRV_LOG(ERR,
+			    "mempool %s create failed: %d", name, rte_errno);
+		goto error;
+	}
+
 	txq->agg_szmax  = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size);
 	txq->agg_pktmax = hv->rndis_agg_pkts;
 	txq->agg_align  = hv->rndis_agg_align;
@@ -260,31 +306,57 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
 	err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc,
 				     socket_id, tx_conf);
-	if (err) {
-		rte_free(txq);
-		return err;
+	if (err == 0) {
+		dev->data->tx_queues[queue_idx] = txq;
+		return 0;
 	}
 
-	dev->data->tx_queues[queue_idx] = txq;
-	return 0;
+error:
+	if (txq->txdesc_pool)
+		rte_mempool_free(txq->txdesc_pool);
+	rte_free(txq->tx_rndis);
+	rte_free(txq);
+	return err;
+}
+
+
+static struct hn_txdesc *hn_txd_get(struct hn_tx_queue *txq)
+{
+	struct hn_txdesc *txd;
+
+	if (rte_mempool_get(txq->txdesc_pool, (void **)&txd)) {
+		++txq->stats.ring_full;
+		PMD_TX_LOG(DEBUG, "tx pool exhausted!");
+		return NULL;
+	}
+
+	txd->m = NULL;
+	txd->packets = 0;
+	txd->data_size = 0;
+	txd->chim_size = 0;
+
+	return txd;
+}
+
+static void hn_txd_put(struct hn_tx_queue *txq, struct hn_txdesc *txd)
+{
+	rte_mempool_put(txq->txdesc_pool, txd);
 }
 
 void
 hn_dev_tx_queue_release(void *arg)
 {
 	struct hn_tx_queue *txq = arg;
-	struct hn_txdesc *txd;
 
 	PMD_INIT_FUNC_TRACE();
 
 	if (!txq)
 		return;
 
-	/* If any pending data is still present just drop it */
-	txd = txq->agg_txd;
-	if (txd)
-		rte_mempool_put(txq->hv->tx_pool, txd);
+	if (txq->txdesc_pool)
+		rte_mempool_free(txq->txdesc_pool);
 
+	rte_free(txq->tx_rndis);
 	rte_free(txq);
 }
 
@@ -292,6 +364,7 @@ static void
 hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id,
 		      unsigned long xactid, const struct hn_nvs_rndis_ack *ack)
 {
+	struct hn_data *hv = dev->data->dev_private;
 	struct hn_txdesc *txd = (struct hn_txdesc *)xactid;
 	struct hn_tx_queue *txq;
 
@@ -312,9 +385,11 @@ hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id,
 		++txq->stats.errors;
 	}
 
-	rte_pktmbuf_free(txd->m);
+	if (txd->chim_index != NVS_CHIM_IDX_INVALID)
+		hn_chim_free(hv, txd->chim_index);
 
-	rte_mempool_put(txq->hv->tx_pool, txd);
+	rte_pktmbuf_free(txd->m);
+	hn_txd_put(txq, txd);
 }
 
 /* Handle transmit completion events */
@@ -1036,28 +1111,15 @@ static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig)
 	return ret;
 }
 
-static struct hn_txdesc *hn_new_txd(struct hn_data *hv,
-				    struct hn_tx_queue *txq)
-{
-	struct hn_txdesc *txd;
-
-	if (rte_mempool_get(hv->tx_pool, (void **)&txd)) {
-		++txq->stats.ring_full;
-		PMD_TX_LOG(DEBUG, "tx pool exhausted!");
-		return NULL;
-	}
-
-	txd->m = NULL;
-	txd->queue_id = txq->queue_id;
-	txd->packets = 0;
-	txd->data_size = 0;
-	txd->chim_size = 0;
-
-	return txd;
-}
-
+/*
+ * Try and find a place in a send chimney buffer to put
+ * the small packet. If space is available, this routine
+ * returns a pointer of where to place the data.
+ * If no space, caller should try direct transmit.
+ */
 static void *
-hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize)
+hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq,
+	     struct hn_txdesc *txd, uint32_t pktsize)
 {
 	struct hn_txdesc *agg_txd = txq->agg_txd;
 	struct rndis_packet_msg *pkt;
@@ -1085,7 +1147,7 @@ hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize)
 		}
 
 		chim = (uint8_t *)pkt + pkt->len;
-
+		txq->agg_prevpkt = chim;
 		txq->agg_pktleft--;
 		txq->agg_szleft -= pktsize;
 		if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) {
@@ -1095,18 +1157,21 @@ hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize)
 			 */
 			txq->agg_pktleft = 0;
 		}
-	} else {
-		agg_txd = hn_new_txd(hv, txq);
-		if (!agg_txd)
-			return NULL;
-
-		chim = (uint8_t *)hv->chim_res->addr
-			+ agg_txd->chim_index * hv->chim_szmax;
 
-		txq->agg_txd = agg_txd;
-		txq->agg_pktleft = txq->agg_pktmax - 1;
-		txq->agg_szleft = txq->agg_szmax - pktsize;
+		hn_txd_put(txq, txd);
+		return chim;
 	}
+
+	txd->chim_index = hn_chim_alloc(hv);
+	if (txd->chim_index == NVS_CHIM_IDX_INVALID)
+		return NULL;
+
+	chim = (uint8_t *)hv->chim_res->addr
+			+ txd->chim_index * hv->chim_szmax;
+
+	txq->agg_txd = txd;
+	txq->agg_pktleft = txq->agg_pktmax - 1;
+	txq->agg_szleft = txq->agg_szmax - pktsize;
 	txq->agg_prevpkt = chim;
 
 	return chim;
@@ -1329,13 +1394,18 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		return (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts);
 	}
 
-	if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh)
+	if (rte_mempool_avail_count(txq->txdesc_pool) <= txq->free_thresh)
 		hn_process_events(hv, txq->queue_id, 0);
 
 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 		struct rte_mbuf *m = tx_pkts[nb_tx];
 		uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN;
 		struct rndis_packet_msg *pkt;
+		struct hn_txdesc *txd;
+
+		txd = hn_txd_get(txq);
+		if (txd == NULL)
+			break;
 
 		/* For small packets aggregate them in chimney buffer */
 		if (m->pkt_len < HN_TXCOPY_THRESHOLD && pkt_size <= txq->agg_szmax) {
@@ -1346,7 +1416,8 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 					goto fail;
 			}
 
-			pkt = hn_try_txagg(hv, txq, pkt_size);
+
+			pkt = hn_try_txagg(hv, txq, txd, pkt_size);
 			if (unlikely(!pkt))
 				break;
 
@@ -1360,21 +1431,13 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 			    hn_flush_txagg(txq, &need_sig))
 				goto fail;
 		} else {
-			struct hn_txdesc *txd;
-
-			/* can send chimney data and large packet at once */
-			txd = txq->agg_txd;
-			if (txd) {
-				hn_reset_txagg(txq);
-			} else {
-				txd = hn_new_txd(hv, txq);
-				if (unlikely(!txd))
-					break;
-			}
+			/* Send any outstanding packets in buffer */
+			if (txq->agg_txd && hn_flush_txagg(txq, &need_sig))
+				goto fail;
 
 			pkt = txd->rndis_pkt;
 			txd->m = m;
-			txd->data_size += m->pkt_len;
+			txd->data_size = m->pkt_len;
 			++txd->packets;
 
 			hn_encap(pkt, queue_id, m);
@@ -1383,7 +1446,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 			if (unlikely(ret != 0)) {
 				PMD_TX_LOG(NOTICE, "sg send failed: %d", ret);
 				++txq->stats.errors;
-				rte_mempool_put(hv->tx_pool, txd);
+				hn_txd_put(txq, txd);
 				goto fail;
 			}
 		}
diff --git a/drivers/net/netvsc/hn_var.h b/drivers/net/netvsc/hn_var.h
index 05bc492511ec..822d737bd3cc 100644
--- a/drivers/net/netvsc/hn_var.h
+++ b/drivers/net/netvsc/hn_var.h
@@ -52,6 +52,8 @@ struct hn_tx_queue {
 	uint16_t	port_id;
 	uint16_t	queue_id;
 	uint32_t	free_thresh;
+	struct rte_mempool *txdesc_pool;
+	void		*tx_rndis;
 
 	/* Applied packet transmission aggregation limits. */
 	uint32_t	agg_szmax;
@@ -115,8 +117,10 @@ struct hn_data {
 	uint16_t	num_queues;
 	uint64_t	rss_offloads;
 
+	rte_spinlock_t	chim_lock;
 	struct rte_mem_resource *chim_res;	/* UIO resource for Tx */
-	struct rte_mempool *tx_pool;		/* Tx descriptors */
+	struct rte_bitmap *chim_bmap;		/* Send buffer map */
+	void		*chim_bmem;
 	uint32_t	chim_szmax;		/* Max size per buffer */
 	uint32_t	chim_cnt;		/* Max packets per buffer */
 
@@ -157,8 +161,8 @@ uint16_t hn_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 uint16_t hn_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		      uint16_t nb_pkts);
 
-int	hn_tx_pool_init(struct rte_eth_dev *dev);
-void	hn_tx_pool_uninit(struct rte_eth_dev *dev);
+int	hn_chim_init(struct rte_eth_dev *dev);
+void	hn_chim_uninit(struct rte_eth_dev *dev);
 int	hn_dev_link_update(struct rte_eth_dev *dev, int wait);
 int	hn_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 			      uint16_t nb_desc, unsigned int socket_id,
-- 
2.20.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-stable] [PATCH v4 4/8] net/netvsc: fix invalid rte_free on dev_close
       [not found] ` <20200331171404.23596-1-stephen@networkplumber.org>
                     ` (2 preceding siblings ...)
  2020-03-31 17:13   ` [dpdk-stable] [PATCH v4 3/8] net/netvsc: split send buffers from transmit descriptors Stephen Hemminger
@ 2020-03-31 17:14   ` Stephen Hemminger
  2020-04-06 16:00     ` Ferruh Yigit
  2020-03-31 17:14   ` [dpdk-stable] [PATCH v4 5/8] net/netvsc: remove process event optimization Stephen Hemminger
  2020-03-31 17:14   ` [dpdk-stable] [PATCH v4 6/8] net/netvsc: handle transmit completions based on burst size Stephen Hemminger
  5 siblings, 1 reply; 16+ messages in thread
From: Stephen Hemminger @ 2020-03-31 17:14 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, stable

The netvsc PMD was putting the mac address in private data but the
core rte_ethdev doesn't allow that it. It has to be in rte_malloc'd
memory or the a message will be printed on shutdown/close.
 EAL: Invalid memory

Fixes: f8279f47dd89 ("net/netvsc: fix crash in secondary process")
Cc: stable@dpdk.org
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/netvsc/hn_ethdev.c | 16 ++++++++++------
 drivers/net/netvsc/hn_var.h    |  2 --
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index ac6610838008..05f1a25a1abc 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -134,8 +134,6 @@ eth_dev_vmbus_allocate(struct rte_vmbus_device *dev, size_t private_data_size)
 static void
 eth_dev_vmbus_release(struct rte_eth_dev *eth_dev)
 {
-	/* mac_addrs must not be freed alone because part of dev_private */
-	eth_dev->data->mac_addrs = NULL;
 	/* free ether device */
 	rte_eth_dev_release_port(eth_dev);
 
@@ -937,9 +935,6 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
 	eth_dev->tx_pkt_burst = &hn_xmit_pkts;
 	eth_dev->rx_pkt_burst = &hn_recv_pkts;
 
-	/* Since Hyper-V only supports one MAC address, just use local data */
-	eth_dev->data->mac_addrs = &hv->mac_addr;
-
 	/*
 	 * for secondary processes, we don't initialize any further as primary
 	 * has already done this work.
@@ -947,6 +942,15 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
 
+	/* Since Hyper-V only supports one MAC address */
+	eth_dev->data->mac_addrs = rte_calloc("hv_mac", HN_MAX_MAC_ADDRS,
+					      sizeof(struct rte_ether_addr), 0);
+	if (eth_dev->data->mac_addrs == NULL) {
+		PMD_INIT_LOG(ERR,
+			     "Failed to allocate memory store MAC addresses");
+		return -ENOMEM;
+	}
+
 	hv->vmbus = vmbus;
 	hv->rxbuf_res = &vmbus->resource[HV_RECV_BUF_MAP];
 	hv->chim_res  = &vmbus->resource[HV_SEND_BUF_MAP];
@@ -989,7 +993,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
 	if (err)
 		goto failed;
 
-	err = hn_rndis_get_eaddr(hv, hv->mac_addr.addr_bytes);
+	err = hn_rndis_get_eaddr(hv, eth_dev->data->mac_addrs->addr_bytes);
 	if (err)
 		goto failed;
 
diff --git a/drivers/net/netvsc/hn_var.h b/drivers/net/netvsc/hn_var.h
index 822d737bd3cc..b4c61717379f 100644
--- a/drivers/net/netvsc/hn_var.h
+++ b/drivers/net/netvsc/hn_var.h
@@ -139,8 +139,6 @@ struct hn_data {
 	uint8_t		rss_key[40];
 	uint16_t	rss_ind[128];
 
-	struct rte_ether_addr mac_addr;
-
 	struct rte_eth_dev_owner owner;
 	struct rte_intr_handle vf_intr;
 
-- 
2.20.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-stable] [PATCH v4 5/8] net/netvsc: remove process event optimization
       [not found] ` <20200331171404.23596-1-stephen@networkplumber.org>
                     ` (3 preceding siblings ...)
  2020-03-31 17:14   ` [dpdk-stable] [PATCH v4 4/8] net/netvsc: fix invalid rte_free on dev_close Stephen Hemminger
@ 2020-03-31 17:14   ` Stephen Hemminger
  2020-03-31 17:14   ` [dpdk-stable] [PATCH v4 6/8] net/netvsc: handle transmit completions based on burst size Stephen Hemminger
  5 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2020-03-31 17:14 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, stable

Remove unlocked check for data in receive ring.
This check is not safe because of missing barriers etc.

Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device")
Cc: stable@dpdk.org
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/netvsc/hn_rxtx.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c
index 32c03e3da0c7..e8df84604202 100644
--- a/drivers/net/netvsc/hn_rxtx.c
+++ b/drivers/net/netvsc/hn_rxtx.c
@@ -969,10 +969,6 @@ uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id,
 
 	rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id];
 
-	/* If no pending data then nothing to do */
-	if (rte_vmbus_chan_rx_empty(rxq->chan))
-		return 0;
-
 	/*
 	 * Since channel is shared between Rx and TX queue need to have a lock
 	 * since DPDK does not force same CPU to be used for Rx/Tx.
-- 
2.20.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-stable] [PATCH v4 6/8] net/netvsc: handle transmit completions based on burst size
       [not found] ` <20200331171404.23596-1-stephen@networkplumber.org>
                     ` (4 preceding siblings ...)
  2020-03-31 17:14   ` [dpdk-stable] [PATCH v4 5/8] net/netvsc: remove process event optimization Stephen Hemminger
@ 2020-03-31 17:14   ` Stephen Hemminger
  5 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2020-03-31 17:14 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, stable

If tx_free_thresh is quite low, it is possible that we need to
cleanup based on burst size.

Fixes: fc30efe3a22e ("net/netvsc: change Rx descriptor setup and sizing")
Cc: stable@dpdk.org
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/netvsc/hn_rxtx.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c
index e8df84604202..cbdfcc628b75 100644
--- a/drivers/net/netvsc/hn_rxtx.c
+++ b/drivers/net/netvsc/hn_rxtx.c
@@ -1375,7 +1375,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	struct hn_data *hv = txq->hv;
 	struct rte_eth_dev *vf_dev;
 	bool need_sig = false;
-	uint16_t nb_tx;
+	uint16_t nb_tx, avail;
 	int ret;
 
 	if (unlikely(hv->closed))
@@ -1390,7 +1390,8 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		return (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts);
 	}
 
-	if (rte_mempool_avail_count(txq->txdesc_pool) <= txq->free_thresh)
+	avail = rte_mempool_avail_count(txq->txdesc_pool);
+	if (nb_pkts > avail || avail <= txq->free_thresh)
 		hn_process_events(hv, txq->queue_id, 0);
 
 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
-- 
2.20.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-stable] [PATCH v4 4/8] net/netvsc: fix invalid rte_free on dev_close
  2020-03-31 17:14   ` [dpdk-stable] [PATCH v4 4/8] net/netvsc: fix invalid rte_free on dev_close Stephen Hemminger
@ 2020-04-06 16:00     ` Ferruh Yigit
  0 siblings, 0 replies; 16+ messages in thread
From: Ferruh Yigit @ 2020-04-06 16:00 UTC (permalink / raw)
  To: Stephen Hemminger, dev; +Cc: stable

On 3/31/2020 6:14 PM, Stephen Hemminger wrote:
> The netvsc PMD was putting the mac address in private data but the
> core rte_ethdev doesn't allow that it. It has to be in rte_malloc'd
> memory or the a message will be printed on shutdown/close.

It doesn't have to be rte_malloc'ed. Setting 'eth_dev->data->mac_addrs' to null
in 'hn_dev_close()' should fix it.

But dynamically allocating it also works if that is what preferred.

>  EAL: Invalid memory
> 
> Fixes: f8279f47dd89 ("net/netvsc: fix crash in secondary process")
> Cc: stable@dpdk.org
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  drivers/net/netvsc/hn_ethdev.c | 16 ++++++++++------
>  drivers/net/netvsc/hn_var.h    |  2 --
>  2 files changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
> index ac6610838008..05f1a25a1abc 100644
> --- a/drivers/net/netvsc/hn_ethdev.c
> +++ b/drivers/net/netvsc/hn_ethdev.c
> @@ -134,8 +134,6 @@ eth_dev_vmbus_allocate(struct rte_vmbus_device *dev, size_t private_data_size)
>  static void
>  eth_dev_vmbus_release(struct rte_eth_dev *eth_dev)
>  {
> -	/* mac_addrs must not be freed alone because part of dev_private */
> -	eth_dev->data->mac_addrs = NULL;
>  	/* free ether device */
>  	rte_eth_dev_release_port(eth_dev);
>  
> @@ -937,9 +935,6 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
>  	eth_dev->tx_pkt_burst = &hn_xmit_pkts;
>  	eth_dev->rx_pkt_burst = &hn_recv_pkts;
>  
> -	/* Since Hyper-V only supports one MAC address, just use local data */
> -	eth_dev->data->mac_addrs = &hv->mac_addr;
> -
>  	/*
>  	 * for secondary processes, we don't initialize any further as primary
>  	 * has already done this work.
> @@ -947,6 +942,15 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
>  	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>  		return 0;
>  
> +	/* Since Hyper-V only supports one MAC address */
> +	eth_dev->data->mac_addrs = rte_calloc("hv_mac", HN_MAX_MAC_ADDRS,
> +					      sizeof(struct rte_ether_addr), 0);
> +	if (eth_dev->data->mac_addrs == NULL) {
> +		PMD_INIT_LOG(ERR,
> +			     "Failed to allocate memory store MAC addresses");
> +		return -ENOMEM;
> +	}
> +
>  	hv->vmbus = vmbus;
>  	hv->rxbuf_res = &vmbus->resource[HV_RECV_BUF_MAP];
>  	hv->chim_res  = &vmbus->resource[HV_SEND_BUF_MAP];
> @@ -989,7 +993,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
>  	if (err)
>  		goto failed;
>  
> -	err = hn_rndis_get_eaddr(hv, hv->mac_addr.addr_bytes);
> +	err = hn_rndis_get_eaddr(hv, eth_dev->data->mac_addrs->addr_bytes);
>  	if (err)
>  		goto failed;
>  
> diff --git a/drivers/net/netvsc/hn_var.h b/drivers/net/netvsc/hn_var.h
> index 822d737bd3cc..b4c61717379f 100644
> --- a/drivers/net/netvsc/hn_var.h
> +++ b/drivers/net/netvsc/hn_var.h
> @@ -139,8 +139,6 @@ struct hn_data {
>  	uint8_t		rss_key[40];
>  	uint16_t	rss_ind[128];
>  
> -	struct rte_ether_addr mac_addr;
> -
>  	struct rte_eth_dev_owner owner;
>  	struct rte_intr_handle vf_intr;
>  
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2020-04-06 16:01 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20200316235612.29854-1-stephen@networkplumber.org>
2020-03-16 23:56 ` [dpdk-stable] [PATCH 1/2] net/netvsc: handle receive packets during multi-channel setup Stephen Hemminger
     [not found] ` <20200318202957.18121-1-stephen@networkplumber.org>
2020-03-18 20:29   ` [dpdk-stable] [PATCH v2 1/4] net/netvsc: propogate descriptor limits from VF to netvsc Stephen Hemminger
2020-03-18 20:29   ` [dpdk-stable] [PATCH v2 2/4] net/netvsc: handle receive packets during multi-channel setup Stephen Hemminger
2020-03-18 20:29   ` [dpdk-stable] [PATCH v2 3/4] net/netvsc: split send buffers from transmit descriptors Stephen Hemminger
2020-03-18 20:29   ` [dpdk-stable] [PATCH v2 4/4] net/netvsc: don't enable RSS if only single receive queue Stephen Hemminger
     [not found] ` <20200318205104.22486-1-stephen@networkplumber.org>
2020-03-18 20:51   ` [dpdk-stable] [PATCH v3 1/4] net/netvsc: propagate descriptor limits from VF to netvsc Stephen Hemminger
2020-03-18 20:51   ` [dpdk-stable] [PATCH v3 2/4] net/netvsc: handle receive packets during multi-channel setup Stephen Hemminger
2020-03-18 20:51   ` [dpdk-stable] [PATCH v3 3/4] net/netvsc: split send buffers from transmit descriptors Stephen Hemminger
2020-03-18 20:51   ` [dpdk-stable] [PATCH v3 4/4] net/netvsc: don't enable RSS if only single receive queue Stephen Hemminger
     [not found] ` <20200331171404.23596-1-stephen@networkplumber.org>
2020-03-31 17:13   ` [dpdk-stable] [PATCH v4 1/8] net/netvsc: propagate descriptor limits from VF to netvsc Stephen Hemminger
2020-03-31 17:13   ` [dpdk-stable] [PATCH v4 2/8] net/netvsc: handle receive packets during multi-channel setup Stephen Hemminger
2020-03-31 17:13   ` [dpdk-stable] [PATCH v4 3/8] net/netvsc: split send buffers from transmit descriptors Stephen Hemminger
2020-03-31 17:14   ` [dpdk-stable] [PATCH v4 4/8] net/netvsc: fix invalid rte_free on dev_close Stephen Hemminger
2020-04-06 16:00     ` Ferruh Yigit
2020-03-31 17:14   ` [dpdk-stable] [PATCH v4 5/8] net/netvsc: remove process event optimization Stephen Hemminger
2020-03-31 17:14   ` [dpdk-stable] [PATCH v4 6/8] net/netvsc: handle transmit completions based on burst size Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).