DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic
@ 2014-12-15  2:56 Ouyang Changchun
  2014-12-15  2:57 ` [dpdk-dev] [PATCH 1/6] ixgbe: Code cleanup Ouyang Changchun
                   ` (7 more replies)
  0 siblings, 8 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-15  2:56 UTC (permalink / raw)
  To: dev

This patch enables VF RSS for Niantic, which allow each VF having at most 4 queues.
The actual queue number per VF depends on the number of VF:
VF number from 1~32: 4 queues per VF;
VF number from 33~max vf num: 2 queues per VF;
 
On host, to enable VF RSS functionality, mq mode should be set as ETH_MQ_RX_VMDQ_RSS
or ETH_MQ_RX_RSS mode, and SRIOV mode should be activated.
It also needs config VF RSS information like hash function, RSS key, RSS key length.

Changchun Ouyang (6):
  ixgbe: Code cleanup
  ixgbe: Negotiate VF API version
  ixgbe: Get VF queue number
  ether: Check VMDq RSS mode
  ixgbe: Config VF RSS
  testpmd: Set Rx VMDq RSS mode

 app/test-pmd/testpmd.c              |  9 ++++
 lib/librte_ether/rte_ethdev.c       | 21 ++++++--
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  1 +
 lib/librte_pmd_ixgbe/ixgbe_pf.c     | 75 ++++++++++++++++++++++++++++-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c   | 95 +++++++++++++++++++++++++++----------
 5 files changed, 171 insertions(+), 30 deletions(-)

-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH 1/6] ixgbe: Code cleanup
  2014-12-15  2:56 [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic Ouyang Changchun
@ 2014-12-15  2:57 ` Ouyang Changchun
  2014-12-15  2:57 ` [dpdk-dev] [PATCH 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-15  2:57 UTC (permalink / raw)
  To: dev

Put global register configuring out of loop for queue; also fix typo and indent.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 5c36bff..f58f98e 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3985,7 +3985,7 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 	struct igb_rx_queue *rxq;
 	struct rte_pktmbuf_pool_private *mbp_priv;
 	uint64_t bus_addr;
-	uint32_t srrctl;
+	uint32_t srrctl, psrtype = 0;
 	uint16_t buf_size;
 	uint16_t i;
 	int ret;
@@ -4039,20 +4039,10 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		 * Configure Header Split
 		 */
 		if (dev->data->dev_conf.rxmode.header_split) {
-
-			/* Must setup the PSRTYPE register */
-			uint32_t psrtype;
-			psrtype = IXGBE_PSRTYPE_TCPHDR |
-				IXGBE_PSRTYPE_UDPHDR   |
-				IXGBE_PSRTYPE_IPV4HDR  |
-				IXGBE_PSRTYPE_IPV6HDR;
-
-			IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
-
 			srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
-				   IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
-				  IXGBE_SRRCTL_BSIZEHDR_MASK);
-			srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+				IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
+				IXGBE_SRRCTL_BSIZEHDR_MASK);
+			srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
 		} else
 #endif
 			srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
@@ -4095,6 +4085,17 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		}
 	}
 
+#ifdef RTE_HEADER_SPLIT_ENABLE
+	if (dev->data->dev_conf.rxmode.header_split) {
+		/* Must setup the PSRTYPE register */
+		psrtype = IXGBE_PSRTYPE_TCPHDR |
+			IXGBE_PSRTYPE_UDPHDR   |
+			IXGBE_PSRTYPE_IPV4HDR  |
+			IXGBE_PSRTYPE_IPV6HDR;
+#endif
+
+	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
+
 	if (dev->data->dev_conf.rxmode.enable_scatter) {
 		if (!dev->data->scattered_rx)
 			PMD_INIT_LOG(DEBUG, "forcing scatter mode");
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH 2/6] ixgbe: Negotiate VF API version
  2014-12-15  2:56 [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic Ouyang Changchun
  2014-12-15  2:57 ` [dpdk-dev] [PATCH 1/6] ixgbe: Code cleanup Ouyang Changchun
@ 2014-12-15  2:57 ` Ouyang Changchun
  2014-12-15  2:57 ` [dpdk-dev] [PATCH 3/6] ixgbe: Get VF queue number Ouyang Changchun
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-15  2:57 UTC (permalink / raw)
  To: dev

Negotiate API version with VF when receiving the IXGBE_VF_API_NEGOTIATE message.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  1 +
 lib/librte_pmd_ixgbe/ixgbe_pf.c     | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
index ca99170..730098d 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
@@ -159,6 +159,7 @@ struct ixgbe_vf_info {
 	uint16_t tx_rate[IXGBE_MAX_QUEUE_NUM_PER_VF];
 	uint16_t vlan_count;
 	uint8_t spoofchk_enabled;
+	uint8_t api_version;
 };
 
 /*
diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index 51da1fd..495aff5 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -469,6 +469,28 @@ ixgbe_set_vf_lpe(struct rte_eth_dev *dev, __rte_unused uint32_t vf, uint32_t *ms
 }
 
 static int
+ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
+{
+	uint32_t api_version = msgbuf[1];
+	struct ixgbe_vf_info *vfinfo =
+		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
+
+	switch (api_version) {
+	case ixgbe_mbox_api_10:
+	case ixgbe_mbox_api_11:
+		vfinfo[vf].api_version = (uint8_t)api_version;
+		return 0;
+	default:
+		break;
+	}
+
+	RTE_LOG(ERR, PMD, "Negotiate invalid api version %u from VF %d\n",
+		api_version, vf);
+
+	return -1;
+}
+
+static int
 ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 {
 	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
@@ -512,6 +534,9 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 	case IXGBE_VF_SET_VLAN:
 		retval = ixgbe_vf_set_vlan(dev, vf, msgbuf);
 		break;
+	case IXGBE_VF_API_NEGOTIATE:
+		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
+		break;
 	default:
 		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH 3/6] ixgbe: Get VF queue number
  2014-12-15  2:56 [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic Ouyang Changchun
  2014-12-15  2:57 ` [dpdk-dev] [PATCH 1/6] ixgbe: Code cleanup Ouyang Changchun
  2014-12-15  2:57 ` [dpdk-dev] [PATCH 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
@ 2014-12-15  2:57 ` Ouyang Changchun
  2014-12-15  2:57 ` [dpdk-dev] [PATCH 4/6] ether: Check VMDq RSS mode Ouyang Changchun
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-15  2:57 UTC (permalink / raw)
  To: dev

Get the available Rx and Tx queue number when receiving IXGBE_VF_GET_QUEUES message from VF.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_pf.c | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index 495aff5..cbb0145 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -53,6 +53,8 @@
 #include "ixgbe_ethdev.h"
 
 #define IXGBE_MAX_VFTA     (128)
+#define IXGBE_VF_MSG_SIZE_DEFAULT 1
+#define IXGBE_VF_GET_QUEUE_MSG_SIZE 5
 
 static inline uint16_t
 dev_num_vf(struct rte_eth_dev *eth_dev)
@@ -491,9 +493,36 @@ ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
 }
 
 static int
+ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
+{
+	struct ixgbe_vf_info *vfinfo =
+		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
+	uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+
+	/* Verify if the PF supports the mbox APIs version or not */
+	switch (vfinfo[vf].api_version) {
+	case ixgbe_mbox_api_20:
+	case ixgbe_mbox_api_11:
+		break;
+	default:
+		return -1;
+	}
+
+	/* Notify VF of Rx and Tx queue number */
+	msgbuf[IXGBE_VF_RX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+	msgbuf[IXGBE_VF_TX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+
+	/* Notify VF of default queue */
+	msgbuf[IXGBE_VF_DEF_QUEUE] = default_q;
+
+	return 0;
+}
+
+static int
 ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 {
 	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
+	uint16_t msg_size = IXGBE_VF_MSG_SIZE_DEFAULT;
 	uint32_t msgbuf[IXGBE_VFMAILBOX_SIZE];
 	int32_t retval;
 	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -537,6 +566,10 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 	case IXGBE_VF_API_NEGOTIATE:
 		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
 		break;
+	case IXGBE_VF_GET_QUEUES:
+		retval = ixgbe_get_vf_queues(dev, vf, msgbuf);
+		msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE;
+		break;
 	default:
 		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
@@ -551,7 +584,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 
 	msgbuf[0] |= IXGBE_VT_MSGTYPE_CTS;
 
-	ixgbe_write_mbx(hw, msgbuf, 1, vf);
+	ixgbe_write_mbx(hw, msgbuf, msg_size, vf);
 
 	return retval;
 }
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH 4/6] ether: Check VMDq RSS mode
  2014-12-15  2:56 [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic Ouyang Changchun
                   ` (2 preceding siblings ...)
  2014-12-15  2:57 ` [dpdk-dev] [PATCH 3/6] ixgbe: Get VF queue number Ouyang Changchun
@ 2014-12-15  2:57 ` Ouyang Changchun
  2014-12-15  2:57 ` [dpdk-dev] [PATCH 5/6] ixgbe: Config VF RSS Ouyang Changchun
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-15  2:57 UTC (permalink / raw)
  To: dev

Check mq mode for VMDq RSS, handle it correctly instead of returning an error;
Also remove the limitation of per pool queue number has max value of 1, because
the per pool queue number could be 2 or 4 if it is VMDq RSS mode;

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_ether/rte_ethdev.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 95f2ceb..f34d6b2 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -510,8 +510,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 
 	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
 		/* check multi-queue mode */
-		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
-		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
+		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
 		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||
 		    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
 			/* SRIOV only works in VMDq enable mode */
@@ -525,7 +524,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		}
 
 		switch (dev_conf->rxmode.mq_mode) {
-		case ETH_MQ_RX_VMDQ_RSS:
 		case ETH_MQ_RX_VMDQ_DCB:
 		case ETH_MQ_RX_VMDQ_DCB_RSS:
 			/* DCB/RSS VMDQ in SRIOV mode, not implement yet */
@@ -534,6 +532,21 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 					"unsupported VMDQ mq_mode rx %u\n",
 					port_id, dev_conf->rxmode.mq_mode);
 			return (-EINVAL);
+		case ETH_MQ_RX_VMDQ_RSS:
+			dev->data->dev_conf.rxmode.mq_mode =
+				dev_conf->rxmode.mq_mode;
+			break;
+		case ETH_MQ_RX_RSS:
+			dev->data->dev_conf.rxmode.mq_mode =
+				ETH_MQ_RX_VMDQ_RSS;
+			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
+					" SRIOV active, "
+					"Rx mq mode is changed from:"
+					"mq_mode %u into VMDQ mq_mode %u\n",
+					port_id,
+					dev_conf->rxmode.mq_mode,
+					dev->data->dev_conf.rxmode.mq_mode);
+			break;
 		default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
 			/* if nothing mq mode configure, use default scheme */
 			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;
@@ -553,8 +566,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
 			/* if nothing mq mode configure, use default scheme */
 			dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;
-			if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
-				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
 			break;
 		}
 
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH 5/6] ixgbe: Config VF RSS
  2014-12-15  2:56 [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic Ouyang Changchun
                   ` (3 preceding siblings ...)
  2014-12-15  2:57 ` [dpdk-dev] [PATCH 4/6] ether: Check VMDq RSS mode Ouyang Changchun
@ 2014-12-15  2:57 ` Ouyang Changchun
  2014-12-15  2:57 ` [dpdk-dev] [PATCH 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-15  2:57 UTC (permalink / raw)
  To: dev

It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF RSS.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++++
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 66 +++++++++++++++++++++++++++++++++------
 2 files changed, 71 insertions(+), 10 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index cbb0145..9c9dad8 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev *eth_dev)
 	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw->mac.num_rar_entries), 0);
 	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw->mac.num_rar_entries), 0);
 
+	/*
+	 * VF RSS can support at most 4 queues for each VF, even if
+	 * 8 queues are available for each VF, it need refine to 4
+	 * queues here due to this limitation, otherwise no queue
+	 * will receive any packet even RSS is enabled.
+	 */
+	if (eth_dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_RSS) {
+		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
+			RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_32_POOLS;
+			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
+			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
+				dev_num_vf(eth_dev) * 4;
+		}
+	}
+
 	/* set VMDq map to default PF pool */
 	hw->mac.ops.set_vmdq(hw, 0, RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
 
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index f58f98e..5d071b4 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
 }
 
 static int
+ixgbe_config_vf_rss(struct rte_eth_dev *dev)
+{
+	struct ixgbe_hw *hw;
+	uint32_t mrqc;
+
+	ixgbe_rss_configure(dev);
+
+	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+	/* MRQC: enable VF RSS */
+	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
+	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
+	switch (RTE_ETH_DEV_SRIOV(dev).active) {
+	case ETH_64_POOLS:
+		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
+		break;
+
+	case ETH_32_POOLS:
+	case ETH_16_POOLS:
+		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
+		break;
+
+	default:
+		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
+		return -EINVAL;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+
+	return 0;
+}
+
+static int
 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw *hw =
@@ -3358,24 +3391,34 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
 			default: ixgbe_rss_disable(dev);
 		}
 	} else {
-		switch (RTE_ETH_DEV_SRIOV(dev).active) {
 		/*
 		 * SRIOV active scheme
 		 * FIXME if support DCB/RSS together with VMDq & SRIOV
 		 */
-		case ETH_64_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQEN);
+		switch (dev->data->dev_conf.rxmode.mq_mode) {
+		case ETH_MQ_RX_RSS:
+		case ETH_MQ_RX_VMDQ_RSS:
+			ixgbe_config_vf_rss(dev);
 			break;
 
-		case ETH_32_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT4TCEN);
-			break;
+		default:
+			switch (RTE_ETH_DEV_SRIOV(dev).active) {
+			case ETH_64_POOLS:
+				IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQEN);
+				break;
 
-		case ETH_16_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT8TCEN);
+			case ETH_32_POOLS:
+				IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT4TCEN);
+				break;
+
+			case ETH_16_POOLS:
+				IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT8TCEN);
+				break;
+			default:
+				PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
+				break;
+			}
 			break;
-		default:
-			PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
 		}
 	}
 
@@ -4094,6 +4137,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 			IXGBE_PSRTYPE_IPV6HDR;
 #endif
 
+	/* Set RQPL for VF RSS according to max Rx queue */
+	psrtype |= (hw->mac.max_rx_queues >> 1) <<
+			IXGBE_PSRTYPE_RQPL_SHIFT;
 	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
 
 	if (dev->data->dev_conf.rxmode.enable_scatter) {
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH 6/6] testpmd: Set Rx VMDq RSS mode
  2014-12-15  2:56 [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic Ouyang Changchun
                   ` (4 preceding siblings ...)
  2014-12-15  2:57 ` [dpdk-dev] [PATCH 5/6] ixgbe: Config VF RSS Ouyang Changchun
@ 2014-12-15  2:57 ` Ouyang Changchun
  2014-12-15 10:55 ` [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic Bruce Richardson
  2014-12-24  2:56 ` [dpdk-dev] [PATCH v2 " Ouyang Changchun
  7 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-15  2:57 UTC (permalink / raw)
  To: dev

Set VMDq RSS mode if it has VF(VF number is more than 1) and has RSS information.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 app/test-pmd/testpmd.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 8c69756..90a3c64 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -1708,6 +1708,15 @@ init_port_config(void)
 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
 		}
 
+		if (port->dev_info.max_vfs != 0) {
+			if( port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
+				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_RSS;
+			else {
+				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
+				port->dev_conf.txmode.mq_mode = ETH_MQ_TX_NONE;
+			}
+		}
+
 		port->rx_conf.rx_thresh = rx_thresh;
 		port->rx_conf.rx_free_thresh = rx_free_thresh;
 		port->rx_conf.rx_drop_en = rx_drop_en;
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic
  2014-12-15  2:56 [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic Ouyang Changchun
                   ` (5 preceding siblings ...)
  2014-12-15  2:57 ` [dpdk-dev] [PATCH 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
@ 2014-12-15 10:55 ` Bruce Richardson
  2014-12-16  0:58   ` Ouyang, Changchun
  2014-12-24  2:56 ` [dpdk-dev] [PATCH v2 " Ouyang Changchun
  7 siblings, 1 reply; 144+ messages in thread
From: Bruce Richardson @ 2014-12-15 10:55 UTC (permalink / raw)
  To: Ouyang Changchun; +Cc: dev

On Mon, Dec 15, 2014 at 10:56:59AM +0800, Ouyang Changchun wrote:
> This patch enables VF RSS for Niantic, which allow each VF having at most 4 queues.
> The actual queue number per VF depends on the number of VF:
> VF number from 1~32: 4 queues per VF;
> VF number from 33~max vf num: 2 queues per VF;
>  
> On host, to enable VF RSS functionality, mq mode should be set as ETH_MQ_RX_VMDQ_RSS
> or ETH_MQ_RX_RSS mode, and SRIOV mode should be activated.
> It also needs config VF RSS information like hash function, RSS key, RSS key length.
> 

Hi Changchun,

are there limitations to this support, as I understood that that RSS support for
VFs was not fully available in Niantic-based hardware?

/Bruce


> Changchun Ouyang (6):
>   ixgbe: Code cleanup
>   ixgbe: Negotiate VF API version
>   ixgbe: Get VF queue number
>   ether: Check VMDq RSS mode
>   ixgbe: Config VF RSS
>   testpmd: Set Rx VMDq RSS mode
> 
>  app/test-pmd/testpmd.c              |  9 ++++
>  lib/librte_ether/rte_ethdev.c       | 21 ++++++--
>  lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  1 +
>  lib/librte_pmd_ixgbe/ixgbe_pf.c     | 75 ++++++++++++++++++++++++++++-
>  lib/librte_pmd_ixgbe/ixgbe_rxtx.c   | 95 +++++++++++++++++++++++++++----------
>  5 files changed, 171 insertions(+), 30 deletions(-)
> 
> -- 
> 1.8.4.2
> 

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic
  2014-12-15 10:55 ` [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic Bruce Richardson
@ 2014-12-16  0:58   ` Ouyang, Changchun
  0 siblings, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2014-12-16  0:58 UTC (permalink / raw)
  To: Richardson, Bruce; +Cc: dev

Hi Bruce,

> -----Original Message-----
> From: Richardson, Bruce
> Sent: Monday, December 15, 2014 6:55 PM
> To: Ouyang, Changchun
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic
> 
> On Mon, Dec 15, 2014 at 10:56:59AM +0800, Ouyang Changchun wrote:
> > This patch enables VF RSS for Niantic, which allow each VF having at most 4
> queues.
> > The actual queue number per VF depends on the number of VF:
> > VF number from 1~32: 4 queues per VF;
> > VF number from 33~max vf num: 2 queues per VF;
> >
> > On host, to enable VF RSS functionality, mq mode should be set as
> > ETH_MQ_RX_VMDQ_RSS or ETH_MQ_RX_RSS mode, and SRIOV mode
> should be activated.
> > It also needs config VF RSS information like hash function, RSS key, RSS key
> length.
> >
> 
> Hi Changchun,
> 
> are there limitations to this support, as I understood that that RSS support for
> VFs was not fully available in Niantic-based hardware?
> 

The limitation here is RETA table, RSS key, Hash function are shared by pf and vf,
This is not like FVL, in FVL each VF may has its own RETA.

DPDK can setup 4 queues per vf while Linux ixgbe driver setup only 2 queues per vf.
This is a little advantage in DPDK over Linux driver .

Thanks
Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v2 0/6] Enable VF RSS for Niantic
  2014-12-15  2:56 [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic Ouyang Changchun
                   ` (6 preceding siblings ...)
  2014-12-15 10:55 ` [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic Bruce Richardson
@ 2014-12-24  2:56 ` Ouyang Changchun
  2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup Ouyang Changchun
                     ` (6 more replies)
  7 siblings, 7 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-24  2:56 UTC (permalink / raw)
  To: dev

This patch enables VF RSS for Niantic, which allow each VF having at most 4 queues.
The actual queue number per VF depends on the total number of pool, which is
determined by the total number of VF at PF initialization stage and the number of
queue specified in config:
1) If the number of VF is in the range from 1 to 32 and the number of rxq is 4('--rxq 4' in testpmd),
then there is totally 32 pools(ETH_32_POOLS), and each VF have 4 queues;

2)If the number of VF is in the range from 33 to 64 and the number of rxq is 2('--rxq 2' in testpmd),
then there is totally 64 pools(ETH_64_POOLS), and each VF have 2 queues;
 
On host, to enable VF RSS functionality, rx mq mode should be set as ETH_MQ_RX_VMDQ_RSS
or ETH_MQ_RX_RSS mode, and SRIOV mode should be activated(max_vfs >= 1).
It also needs config VF RSS information like hash function, RSS key, RSS key length.

The limitation for Niantic VF RSS is:
the hash and key are shared among PF and all VF, the RETA table with 128 entries are
also shared among PF and all VF. So it is not good idea to query the hash and reta content per VF on
guest, instead, it makes sense to query them on host(PF).

v2 change:
  - Update the description;
  - Use receiving queue number('--rxq <q-num>') specified in config to determine the number of pool and
    the number of queue per VF;

v1 change:
  - Config VF RSS;

Changchun Ouyang (6):
  ixgbe: Code cleanup
  ixgbe: Negotiate VF API version
  ixgbe: Get VF queue number
  ether: Check VMDq RSS mode
  ixgbe: Config VF RSS
  testpmd: Set Rx VMDq RSS mode

 app/test-pmd/testpmd.c              |  10 +++
 lib/librte_ether/rte_ethdev.c       |  39 ++++++++++--
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |   1 +
 lib/librte_pmd_ixgbe/ixgbe_pf.c     |  75 +++++++++++++++++++++-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c   | 121 +++++++++++++++++++++++++++++-------
 5 files changed, 216 insertions(+), 30 deletions(-)

-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
  2014-12-24  2:56 ` [dpdk-dev] [PATCH v2 " Ouyang Changchun
@ 2014-12-24  2:56   ` Ouyang Changchun
  2014-12-24  3:08     ` Zhang, Helin
  2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
                     ` (5 subsequent siblings)
  6 siblings, 1 reply; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-24  2:56 UTC (permalink / raw)
  To: dev

Put global register configuring out of loop for queue; also fix typo and indent.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 5c36bff..f58f98e 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3985,7 +3985,7 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 	struct igb_rx_queue *rxq;
 	struct rte_pktmbuf_pool_private *mbp_priv;
 	uint64_t bus_addr;
-	uint32_t srrctl;
+	uint32_t srrctl, psrtype = 0;
 	uint16_t buf_size;
 	uint16_t i;
 	int ret;
@@ -4039,20 +4039,10 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		 * Configure Header Split
 		 */
 		if (dev->data->dev_conf.rxmode.header_split) {
-
-			/* Must setup the PSRTYPE register */
-			uint32_t psrtype;
-			psrtype = IXGBE_PSRTYPE_TCPHDR |
-				IXGBE_PSRTYPE_UDPHDR   |
-				IXGBE_PSRTYPE_IPV4HDR  |
-				IXGBE_PSRTYPE_IPV6HDR;
-
-			IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
-
 			srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
-				   IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
-				  IXGBE_SRRCTL_BSIZEHDR_MASK);
-			srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+				IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
+				IXGBE_SRRCTL_BSIZEHDR_MASK);
+			srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
 		} else
 #endif
 			srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
@@ -4095,6 +4085,17 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		}
 	}
 
+#ifdef RTE_HEADER_SPLIT_ENABLE
+	if (dev->data->dev_conf.rxmode.header_split) {
+		/* Must setup the PSRTYPE register */
+		psrtype = IXGBE_PSRTYPE_TCPHDR |
+			IXGBE_PSRTYPE_UDPHDR   |
+			IXGBE_PSRTYPE_IPV4HDR  |
+			IXGBE_PSRTYPE_IPV6HDR;
+#endif
+
+	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
+
 	if (dev->data->dev_conf.rxmode.enable_scatter) {
 		if (!dev->data->scattered_rx)
 			PMD_INIT_LOG(DEBUG, "forcing scatter mode");
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v2 2/6] ixgbe: Negotiate VF API version
  2014-12-24  2:56 ` [dpdk-dev] [PATCH v2 " Ouyang Changchun
  2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup Ouyang Changchun
@ 2014-12-24  2:56   ` Ouyang Changchun
  2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 3/6] ixgbe: Get VF queue number Ouyang Changchun
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-24  2:56 UTC (permalink / raw)
  To: dev

Negotiate API version with VF when receiving the IXGBE_VF_API_NEGOTIATE message.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  1 +
 lib/librte_pmd_ixgbe/ixgbe_pf.c     | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
index ca99170..730098d 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
@@ -159,6 +159,7 @@ struct ixgbe_vf_info {
 	uint16_t tx_rate[IXGBE_MAX_QUEUE_NUM_PER_VF];
 	uint16_t vlan_count;
 	uint8_t spoofchk_enabled;
+	uint8_t api_version;
 };
 
 /*
diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index 51da1fd..495aff5 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -469,6 +469,28 @@ ixgbe_set_vf_lpe(struct rte_eth_dev *dev, __rte_unused uint32_t vf, uint32_t *ms
 }
 
 static int
+ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
+{
+	uint32_t api_version = msgbuf[1];
+	struct ixgbe_vf_info *vfinfo =
+		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
+
+	switch (api_version) {
+	case ixgbe_mbox_api_10:
+	case ixgbe_mbox_api_11:
+		vfinfo[vf].api_version = (uint8_t)api_version;
+		return 0;
+	default:
+		break;
+	}
+
+	RTE_LOG(ERR, PMD, "Negotiate invalid api version %u from VF %d\n",
+		api_version, vf);
+
+	return -1;
+}
+
+static int
 ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 {
 	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
@@ -512,6 +534,9 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 	case IXGBE_VF_SET_VLAN:
 		retval = ixgbe_vf_set_vlan(dev, vf, msgbuf);
 		break;
+	case IXGBE_VF_API_NEGOTIATE:
+		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
+		break;
 	default:
 		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v2 3/6] ixgbe: Get VF queue number
  2014-12-24  2:56 ` [dpdk-dev] [PATCH v2 " Ouyang Changchun
  2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup Ouyang Changchun
  2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
@ 2014-12-24  2:56   ` Ouyang Changchun
  2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 4/6] ether: Check VMDq RSS mode Ouyang Changchun
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-24  2:56 UTC (permalink / raw)
  To: dev

Get the available Rx and Tx queue number when receiving IXGBE_VF_GET_QUEUES message from VF.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_pf.c | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index 495aff5..cbb0145 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -53,6 +53,8 @@
 #include "ixgbe_ethdev.h"
 
 #define IXGBE_MAX_VFTA     (128)
+#define IXGBE_VF_MSG_SIZE_DEFAULT 1
+#define IXGBE_VF_GET_QUEUE_MSG_SIZE 5
 
 static inline uint16_t
 dev_num_vf(struct rte_eth_dev *eth_dev)
@@ -491,9 +493,36 @@ ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
 }
 
 static int
+ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
+{
+	struct ixgbe_vf_info *vfinfo =
+		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
+	uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+
+	/* Verify if the PF supports the mbox APIs version or not */
+	switch (vfinfo[vf].api_version) {
+	case ixgbe_mbox_api_20:
+	case ixgbe_mbox_api_11:
+		break;
+	default:
+		return -1;
+	}
+
+	/* Notify VF of Rx and Tx queue number */
+	msgbuf[IXGBE_VF_RX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+	msgbuf[IXGBE_VF_TX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+
+	/* Notify VF of default queue */
+	msgbuf[IXGBE_VF_DEF_QUEUE] = default_q;
+
+	return 0;
+}
+
+static int
 ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 {
 	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
+	uint16_t msg_size = IXGBE_VF_MSG_SIZE_DEFAULT;
 	uint32_t msgbuf[IXGBE_VFMAILBOX_SIZE];
 	int32_t retval;
 	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -537,6 +566,10 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 	case IXGBE_VF_API_NEGOTIATE:
 		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
 		break;
+	case IXGBE_VF_GET_QUEUES:
+		retval = ixgbe_get_vf_queues(dev, vf, msgbuf);
+		msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE;
+		break;
 	default:
 		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
@@ -551,7 +584,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 
 	msgbuf[0] |= IXGBE_VT_MSGTYPE_CTS;
 
-	ixgbe_write_mbx(hw, msgbuf, 1, vf);
+	ixgbe_write_mbx(hw, msgbuf, msg_size, vf);
 
 	return retval;
 }
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v2 4/6] ether: Check VMDq RSS mode
  2014-12-24  2:56 ` [dpdk-dev] [PATCH v2 " Ouyang Changchun
                     ` (2 preceding siblings ...)
  2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 3/6] ixgbe: Get VF queue number Ouyang Changchun
@ 2014-12-24  2:56   ` Ouyang Changchun
  2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 5/6] ixgbe: Config VF RSS Ouyang Changchun
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-24  2:56 UTC (permalink / raw)
  To: dev

Check multiple queues mode for VMDq RSS, handle it correctly instead of returning an error;
Also remove the limitation of per pool queue number has max value of 1, because
the per pool queue number could be 2 or 4 if it is VMDq RSS mode;

The number of rxq specified in config will determine the multiple queues mode for VMDq RSS.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_ether/rte_ethdev.c | 39 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 95f2ceb..59ff325 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -510,8 +510,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 
 	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
 		/* check multi-queue mode */
-		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
-		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
+		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
 		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||
 		    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
 			/* SRIOV only works in VMDq enable mode */
@@ -525,7 +524,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		}
 
 		switch (dev_conf->rxmode.mq_mode) {
-		case ETH_MQ_RX_VMDQ_RSS:
 		case ETH_MQ_RX_VMDQ_DCB:
 		case ETH_MQ_RX_VMDQ_DCB_RSS:
 			/* DCB/RSS VMDQ in SRIOV mode, not implement yet */
@@ -534,6 +532,39 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 					"unsupported VMDQ mq_mode rx %u\n",
 					port_id, dev_conf->rxmode.mq_mode);
 			return (-EINVAL);
+		case ETH_MQ_RX_RSS:
+			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
+					" SRIOV active, "
+					"Rx mq mode is changed from:"
+					"mq_mode %u into VMDQ mq_mode %u\n",
+					port_id,
+					dev_conf->rxmode.mq_mode,
+					dev->data->dev_conf.rxmode.mq_mode);
+		case ETH_MQ_RX_VMDQ_RSS:
+			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_RSS;
+			if (nb_rx_q < RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) {
+				switch (nb_rx_q) {
+				case 1:
+				case 2:
+					RTE_ETH_DEV_SRIOV(dev).active =
+						ETH_64_POOLS;
+					break;
+				case 4:
+					RTE_ETH_DEV_SRIOV(dev).active =
+						ETH_32_POOLS;
+					break;
+				default:
+					PMD_DEBUG_TRACE("ethdev port_id=%d"
+						" SRIOV active, "
+						"queue number invalid\n",
+						port_id);
+					return -EINVAL;
+				}
+				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
+				RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
+					dev->pci_dev->max_vfs * nb_rx_q;
+			}
+			break;
 		default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
 			/* if nothing mq mode configure, use default scheme */
 			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;
@@ -553,8 +584,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
 			/* if nothing mq mode configure, use default scheme */
 			dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;
-			if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
-				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
 			break;
 		}
 
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v2 5/6] ixgbe: Config VF RSS
  2014-12-24  2:56 ` [dpdk-dev] [PATCH v2 " Ouyang Changchun
                     ` (3 preceding siblings ...)
  2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 4/6] ether: Check VMDq RSS mode Ouyang Changchun
@ 2014-12-24  2:56   ` Ouyang Changchun
  2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
  2014-12-24  5:22   ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Ouyang Changchun
  6 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-24  2:56 UTC (permalink / raw)
  To: dev

It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF RSS.

The psrtype will determine how many queues the received packets will distribute to,
and the value of psrtype should depends on both facets: max VF rxq number which
has been negotiated with PF, and the number of rxq specified in config on guest.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 97 insertions(+), 10 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index cbb0145..9c9dad8 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev *eth_dev)
 	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw->mac.num_rar_entries), 0);
 	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw->mac.num_rar_entries), 0);
 
+	/*
+	 * VF RSS can support at most 4 queues for each VF, even if
+	 * 8 queues are available for each VF, it need refine to 4
+	 * queues here due to this limitation, otherwise no queue
+	 * will receive any packet even RSS is enabled.
+	 */
+	if (eth_dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_RSS) {
+		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
+			RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_32_POOLS;
+			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
+			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
+				dev_num_vf(eth_dev) * 4;
+		}
+	}
+
 	/* set VMDq map to default PF pool */
 	hw->mac.ops.set_vmdq(hw, 0, RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
 
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index f58f98e..5a3f528 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
 }
 
 static int
+ixgbe_config_vf_rss(struct rte_eth_dev *dev)
+{
+	struct ixgbe_hw *hw;
+	uint32_t mrqc;
+
+	ixgbe_rss_configure(dev);
+
+	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+	/* MRQC: enable VF RSS */
+	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
+	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
+	switch (RTE_ETH_DEV_SRIOV(dev).active) {
+	case ETH_64_POOLS:
+		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
+		break;
+
+	case ETH_32_POOLS:
+	case ETH_16_POOLS:
+		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
+		break;
+
+	default:
+		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
+		return -EINVAL;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+
+	return 0;
+}
+
+static int
 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw *hw =
@@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
 			default: ixgbe_rss_disable(dev);
 		}
 	} else {
-		switch (RTE_ETH_DEV_SRIOV(dev).active) {
 		/*
 		 * SRIOV active scheme
 		 * FIXME if support DCB/RSS together with VMDq & SRIOV
 		 */
-		case ETH_64_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQEN);
+		switch (dev->data->dev_conf.rxmode.mq_mode) {
+		case ETH_MQ_RX_RSS:
+		case ETH_MQ_RX_VMDQ_RSS:
+			ixgbe_config_vf_rss(dev);
 			break;
 
-		case ETH_32_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT4TCEN);
-			break;
+		default:
+			switch (RTE_ETH_DEV_SRIOV(dev).active) {
+			case ETH_64_POOLS:
+				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+					IXGBE_MRQC_VMDQEN);
+				break;
 
-		case ETH_16_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT8TCEN);
+			case ETH_32_POOLS:
+				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+					IXGBE_MRQC_VMDQRT4TCEN);
+				break;
+
+			case ETH_16_POOLS:
+				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+					IXGBE_MRQC_VMDQRT8TCEN);
+				break;
+			default:
+				PMD_INIT_LOG(ERR,
+					"invalid pool number in IOV mode");
+				break;
+			}
 			break;
-		default:
-			PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
 		}
 	}
 
@@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 	uint16_t buf_size;
 	uint16_t i;
 	int ret;
+	uint16_t valid_rxq_num;
 
 	PMD_INIT_FUNC_TRACE();
 	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
+	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues, hw->mac.max_rx_queues);
+
+	/*
+	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
+	 * and give user a hint that some packets may loss if it doesn't
+	 * poll the queue where those packets are distributed to.
+	 */
+	if (valid_rxq_num == 3)
+		valid_rxq_num = 4;
+
+	if (dev->data->nb_rx_queues > valid_rxq_num) {
+		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
+			"it should be equal to or less than %d",
+			valid_rxq_num);
+		return -1;
+	} else if (dev->data->nb_rx_queues < valid_rxq_num)
+		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
+			"than the number of available Rx queues:%d, "
+			"packets in Rx queues(q_id >= %d) may loss.",
+			valid_rxq_num, dev->data->nb_rx_queues);
+
 	/*
 	 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
 	 * disables the VF receipt of packets if the PF MTU is > 1500.
@@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 			IXGBE_PSRTYPE_IPV6HDR;
 #endif
 
+	/* Set RQPL for VF RSS according to max Rx queue */
+	psrtype |= (valid_rxq_num >> 1) <<
+		IXGBE_PSRTYPE_RQPL_SHIFT;
 	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
 
 	if (dev->data->dev_conf.rxmode.enable_scatter) {
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v2 6/6] testpmd: Set Rx VMDq RSS mode
  2014-12-24  2:56 ` [dpdk-dev] [PATCH v2 " Ouyang Changchun
                     ` (4 preceding siblings ...)
  2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 5/6] ixgbe: Config VF RSS Ouyang Changchun
@ 2014-12-24  2:56   ` Ouyang Changchun
  2014-12-24  5:22   ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Ouyang Changchun
  6 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-24  2:56 UTC (permalink / raw)
  To: dev

Set VMDq RSS mode if it has VF(VF number is more than 1) and has RSS information.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 app/test-pmd/testpmd.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 8c69756..6230f8b 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -1708,6 +1708,16 @@ init_port_config(void)
 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
 		}
 
+		if (port->dev_info.max_vfs != 0) {
+			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
+				port->dev_conf.rxmode.mq_mode =
+					ETH_MQ_RX_VMDQ_RSS;
+			else {
+				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
+				port->dev_conf.txmode.mq_mode = ETH_MQ_TX_NONE;
+			}
+		}
+
 		port->rx_conf.rx_thresh = rx_thresh;
 		port->rx_conf.rx_free_thresh = rx_free_thresh;
 		port->rx_conf.rx_drop_en = rx_drop_en;
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
  2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup Ouyang Changchun
@ 2014-12-24  3:08     ` Zhang, Helin
  2014-12-24  3:22       ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Zhang, Helin @ 2014-12-24  3:08 UTC (permalink / raw)
  To: Ouyang, Changchun, dev

Is header split really supported in ixgbe? I guess not. If not, this code changes are not needed at all.

Regards,
Helin

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang Changchun
> Sent: Wednesday, December 24, 2014 10:57 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> 
> Put global register configuring out of loop for queue; also fix typo and indent.
> 
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>  lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 29 +++++++++++++++--------------
>  1 file changed, 15 insertions(+), 14 deletions(-)
> 
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> index 5c36bff..f58f98e 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> @@ -3985,7 +3985,7 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>  	struct igb_rx_queue *rxq;
>  	struct rte_pktmbuf_pool_private *mbp_priv;
>  	uint64_t bus_addr;
> -	uint32_t srrctl;
> +	uint32_t srrctl, psrtype = 0;
>  	uint16_t buf_size;
>  	uint16_t i;
>  	int ret;
> @@ -4039,20 +4039,10 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>  		 * Configure Header Split
>  		 */
>  		if (dev->data->dev_conf.rxmode.header_split) {
> -
> -			/* Must setup the PSRTYPE register */
> -			uint32_t psrtype;
> -			psrtype = IXGBE_PSRTYPE_TCPHDR |
> -				IXGBE_PSRTYPE_UDPHDR   |
> -				IXGBE_PSRTYPE_IPV4HDR  |
> -				IXGBE_PSRTYPE_IPV6HDR;
> -
> -			IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
> -
>  			srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
> -				   IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
> -				  IXGBE_SRRCTL_BSIZEHDR_MASK);
> -			srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
> +				IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
> +				IXGBE_SRRCTL_BSIZEHDR_MASK);
> +			srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
>  		} else
>  #endif
>  			srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; @@ -4095,6
> +4085,17 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>  		}
>  	}
> 
> +#ifdef RTE_HEADER_SPLIT_ENABLE
> +	if (dev->data->dev_conf.rxmode.header_split) {
> +		/* Must setup the PSRTYPE register */
> +		psrtype = IXGBE_PSRTYPE_TCPHDR |
> +			IXGBE_PSRTYPE_UDPHDR   |
> +			IXGBE_PSRTYPE_IPV4HDR  |
> +			IXGBE_PSRTYPE_IPV6HDR;
> +#endif
> +
> +	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> +
>  	if (dev->data->dev_conf.rxmode.enable_scatter) {
>  		if (!dev->data->scattered_rx)
>  			PMD_INIT_LOG(DEBUG, "forcing scatter mode");
> --
> 1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
  2014-12-24  3:08     ` Zhang, Helin
@ 2014-12-24  3:22       ` Ouyang, Changchun
  2014-12-24  3:41         ` Zhang, Helin
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2014-12-24  3:22 UTC (permalink / raw)
  To: Zhang, Helin, dev

Hi Helin,

> -----Original Message-----
> From: Zhang, Helin
> Sent: Wednesday, December 24, 2014 11:08 AM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> 
> Is header split really supported in ixgbe? I guess not. If not, this code changes
> are not needed at all.
> 

I don't try to modify any logic here, as you know vf rss don't have nothing to do with header split.
You mean " guess not ", can you 'make sure of it' rather than 'guess' if you want to remove those codes?

Thanks
Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
  2014-12-24  3:22       ` Ouyang, Changchun
@ 2014-12-24  3:41         ` Zhang, Helin
  2014-12-24  3:50           ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Zhang, Helin @ 2014-12-24  3:41 UTC (permalink / raw)
  To: Ouyang, Changchun; +Cc: dev

Hi Changchun

> -----Original Message-----
> From: Ouyang, Changchun
> Sent: Wednesday, December 24, 2014 11:22 AM
> To: Zhang, Helin; dev@dpdk.org
> Cc: Ouyang, Changchun
> Subject: RE: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> 
> Hi Helin,
> 
> > -----Original Message-----
> > From: Zhang, Helin
> > Sent: Wednesday, December 24, 2014 11:08 AM
> > To: Ouyang, Changchun; dev@dpdk.org
> > Subject: RE: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> >
> > Is header split really supported in ixgbe? I guess not. If not, this
> > code changes are not needed at all.
> >
> 
> I don't try to modify any logic here, as you know vf rss don't have nothing to do
> with header split.
But you modified code for header split. Have you validated it?

> You mean " guess not ", can you 'make sure of it' rather than 'guess' if you want
> to remove those codes?
I did not see anywhere can enable ixgbe header split. It is your turn to make sure if your
code changes are needed and correct. I don't want to remove any code, just want to know if your code have been validated for header split.

> 
> Thanks
> Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
  2014-12-24  3:41         ` Zhang, Helin
@ 2014-12-24  3:50           ` Ouyang, Changchun
  2014-12-24  3:53             ` Zhang, Helin
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2014-12-24  3:50 UTC (permalink / raw)
  To: Zhang, Helin; +Cc: dev

Hi Helin,

> -----Original Message-----
> From: Zhang, Helin
> Sent: Wednesday, December 24, 2014 11:41 AM
> To: Ouyang, Changchun
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> 
> Hi Changchun
> 
> > -----Original Message-----
> > From: Ouyang, Changchun
> > Sent: Wednesday, December 24, 2014 11:22 AM
> > To: Zhang, Helin; dev@dpdk.org
> > Cc: Ouyang, Changchun
> > Subject: RE: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> >
> > Hi Helin,
> >
> > > -----Original Message-----
> > > From: Zhang, Helin
> > > Sent: Wednesday, December 24, 2014 11:08 AM
> > > To: Ouyang, Changchun; dev@dpdk.org
> > > Subject: RE: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> > >
> > > Is header split really supported in ixgbe? I guess not. If not, this
> > > code changes are not needed at all.
> > >
> >
> > I don't try to modify any logic here, as you know vf rss don't have
> > nothing to do with header split.
> But you modified code for header split. Have you validated it?
> 
> > You mean " guess not ", can you 'make sure of it' rather than 'guess'
> > if you want to remove those codes?
> I did not see anywhere can enable ixgbe header split. It is your turn to make
> sure if your code changes are needed and correct. I don't want to remove
> any code, just want to know if your code have been validated for header split.

Yes, I make sure the code change is correct, and already validate the code change on my platform, besides that, 
we also have validation team to do further validation. any other concern then?

Thanks
Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
  2014-12-24  3:50           ` Ouyang, Changchun
@ 2014-12-24  3:53             ` Zhang, Helin
  2014-12-24  4:46               ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Zhang, Helin @ 2014-12-24  3:53 UTC (permalink / raw)
  To: Ouyang, Changchun; +Cc: dev



> -----Original Message-----
> From: Ouyang, Changchun
> Sent: Wednesday, December 24, 2014 11:50 AM
> To: Zhang, Helin
> Cc: dev@dpdk.org; Ouyang, Changchun
> Subject: RE: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> 
> Hi Helin,
> 
> > -----Original Message-----
> > From: Zhang, Helin
> > Sent: Wednesday, December 24, 2014 11:41 AM
> > To: Ouyang, Changchun
> > Cc: dev@dpdk.org
> > Subject: RE: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> >
> > Hi Changchun
> >
> > > -----Original Message-----
> > > From: Ouyang, Changchun
> > > Sent: Wednesday, December 24, 2014 11:22 AM
> > > To: Zhang, Helin; dev@dpdk.org
> > > Cc: Ouyang, Changchun
> > > Subject: RE: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> > >
> > > Hi Helin,
> > >
> > > > -----Original Message-----
> > > > From: Zhang, Helin
> > > > Sent: Wednesday, December 24, 2014 11:08 AM
> > > > To: Ouyang, Changchun; dev@dpdk.org
> > > > Subject: RE: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> > > >
> > > > Is header split really supported in ixgbe? I guess not. If not,
> > > > this code changes are not needed at all.
> > > >
> > >
> > > I don't try to modify any logic here, as you know vf rss don't have
> > > nothing to do with header split.
> > But you modified code for header split. Have you validated it?
> >
> > > You mean " guess not ", can you 'make sure of it' rather than 'guess'
> > > if you want to remove those codes?
> > I did not see anywhere can enable ixgbe header split. It is your turn
> > to make sure if your code changes are needed and correct. I don't want
> > to remove any code, just want to know if your code have been validated for
> header split.
> 
> Yes, I make sure the code change is correct, and already validate the code
> change on my platform, besides that, we also have validation team to do
> further validation. any other concern then?
I still don't know how to enable header split for ixgbe. Any answer from you?

Regards,
Helin

> 
> Thanks
> Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
  2014-12-24  3:53             ` Zhang, Helin
@ 2014-12-24  4:46               ` Ouyang, Changchun
  0 siblings, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2014-12-24  4:46 UTC (permalink / raw)
  To: Zhang, Helin; +Cc: dev

Hi Helin,

> -----Original Message-----
> From: Zhang, Helin
> Sent: Wednesday, December 24, 2014 11:54 AM
> To: Ouyang, Changchun
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> 
> 
> 
> > -----Original Message-----
> > From: Ouyang, Changchun
> > Sent: Wednesday, December 24, 2014 11:50 AM
> > To: Zhang, Helin
> > Cc: dev@dpdk.org; Ouyang, Changchun
> > Subject: RE: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> >
> > Hi Helin,
> >
> > > -----Original Message-----
> > > From: Zhang, Helin
> > > Sent: Wednesday, December 24, 2014 11:41 AM
> > > To: Ouyang, Changchun
> > > Cc: dev@dpdk.org
> > > Subject: RE: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> > >
> > > Hi Changchun
> > >
> > > > -----Original Message-----
> > > > From: Ouyang, Changchun
> > > > Sent: Wednesday, December 24, 2014 11:22 AM
> > > > To: Zhang, Helin; dev@dpdk.org
> > > > Cc: Ouyang, Changchun
> > > > Subject: RE: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> > > >
> > > > Hi Helin,
> > > >
> > > > > -----Original Message-----
> > > > > From: Zhang, Helin
> > > > > Sent: Wednesday, December 24, 2014 11:08 AM
> > > > > To: Ouyang, Changchun; dev@dpdk.org
> > > > > Subject: RE: [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup
> > > > >
> > > > > Is header split really supported in ixgbe? I guess not. If not,
> > > > > this code changes are not needed at all.
> > > > >
> > > >
> > > > I don't try to modify any logic here, as you know vf rss don't
> > > > have nothing to do with header split.
> > > But you modified code for header split. Have you validated it?
> > >
> > > > You mean " guess not ", can you 'make sure of it' rather than 'guess'
> > > > if you want to remove those codes?
> > > I did not see anywhere can enable ixgbe header split. It is your
> > > turn to make sure if your code changes are needed and correct. I
> > > don't want to remove any code, just want to know if your code have
> > > been validated for
> > header split.
> >
> > Yes, I make sure the code change is correct, and already validate the
> > code change on my platform, besides that, we also have validation team
> > to do further validation. any other concern then?
> I still don't know how to enable header split for ixgbe. Any answer from you?

As this patch aims at enabling vf rss, how about let's focus on it and don't fork to other features?
Thanks for your comments

Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
  2014-12-24  2:56 ` [dpdk-dev] [PATCH v2 " Ouyang Changchun
                     ` (5 preceding siblings ...)
  2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
@ 2014-12-24  5:22   ` Ouyang Changchun
  2014-12-24  5:22     ` [dpdk-dev] [PATCH v3 1/6] ixgbe: Code cleanup Ouyang Changchun
                       ` (8 more replies)
  6 siblings, 9 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-24  5:22 UTC (permalink / raw)
  To: dev

This patch enables VF RSS for Niantic, which allow each VF having at most 4 queues.
The actual queue number per VF depends on the total number of pool, which is
determined by the total number of VF at PF initialization stage and the number of
queue specified in config:
1) If the number of VF is in the range from 1 to 32 and the number of rxq is 4('--rxq 4' in testpmd),
then there is totally 32 pools(ETH_32_POOLS), and each VF have 4 queues;
 
2)If the number of VF is in the range from 33 to 64 and the number of rxq is 2('--rxq 2' in testpmd),
then there is totally 64 pools(ETH_64_POOLS), and each VF have 2 queues;
 
On host, to enable VF RSS functionality, rx mq mode should be set as ETH_MQ_RX_VMDQ_RSS
or ETH_MQ_RX_RSS mode, and SRIOV mode should be activated(max_vfs >= 1).
It also needs config VF RSS information like hash function, RSS key, RSS key length.
 
The limitation for Niantic VF RSS is:
the hash and key are shared among PF and all VF, the RETA table with 128 entries are
also shared among PF and all VF. So it is not good idea to query the hash and reta content per VF on
guest, instead, it makes sense to query them on host(PF).
 
v3 change:
  - More cleanup;

v2 change:
  - Update the description;
  - Use receiving queue number('--rxq <q-num>') specified in config to determine the number of pool and
    the number of queue per VF;
 
v1 change:
  - Config VF RSS;

Changchun Ouyang (6):
  ixgbe: Code cleanup
  ixgbe: Negotiate VF API version
  ixgbe: Get VF queue number
  ether: Check VMDq RSS mode
  ixgbe: Config VF RSS
  testpmd: Set Rx VMDq RSS mode

 app/test-pmd/testpmd.c              |  10 +++
 lib/librte_ether/rte_ethdev.c       |  39 +++++++++--
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |   1 +
 lib/librte_pmd_ixgbe/ixgbe_pf.c     |  75 ++++++++++++++++++++-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c   | 127 ++++++++++++++++++++++++++++--------
 5 files changed, 219 insertions(+), 33 deletions(-)

-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v3 1/6] ixgbe: Code cleanup
  2014-12-24  5:22   ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Ouyang Changchun
@ 2014-12-24  5:22     ` Ouyang Changchun
  2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
                       ` (7 subsequent siblings)
  8 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-24  5:22 UTC (permalink / raw)
  To: dev

Put global register configuring out of loop for queue, where it should be there;
Also fix typo and indent.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 5c36bff..f69abda 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3548,9 +3548,9 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
 				IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
 			}
 			srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
-				   IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
-				  IXGBE_SRRCTL_BSIZEHDR_MASK);
-			srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+				IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
+				IXGBE_SRRCTL_BSIZEHDR_MASK);
+			srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
 		} else
 #endif
 			srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
@@ -3985,7 +3985,7 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 	struct igb_rx_queue *rxq;
 	struct rte_pktmbuf_pool_private *mbp_priv;
 	uint64_t bus_addr;
-	uint32_t srrctl;
+	uint32_t srrctl, psrtype = 0;
 	uint16_t buf_size;
 	uint16_t i;
 	int ret;
@@ -4039,20 +4039,10 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		 * Configure Header Split
 		 */
 		if (dev->data->dev_conf.rxmode.header_split) {
-
-			/* Must setup the PSRTYPE register */
-			uint32_t psrtype;
-			psrtype = IXGBE_PSRTYPE_TCPHDR |
-				IXGBE_PSRTYPE_UDPHDR   |
-				IXGBE_PSRTYPE_IPV4HDR  |
-				IXGBE_PSRTYPE_IPV6HDR;
-
-			IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
-
 			srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
-				   IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
-				  IXGBE_SRRCTL_BSIZEHDR_MASK);
-			srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+				IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
+				IXGBE_SRRCTL_BSIZEHDR_MASK);
+			srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
 		} else
 #endif
 			srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
@@ -4095,6 +4085,17 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		}
 	}
 
+#ifdef RTE_HEADER_SPLIT_ENABLE
+	if (dev->data->dev_conf.rxmode.header_split)
+		/* Must setup the PSRTYPE register */
+		psrtype = IXGBE_PSRTYPE_TCPHDR |
+			IXGBE_PSRTYPE_UDPHDR   |
+			IXGBE_PSRTYPE_IPV4HDR  |
+			IXGBE_PSRTYPE_IPV6HDR;
+#endif
+
+	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
+
 	if (dev->data->dev_conf.rxmode.enable_scatter) {
 		if (!dev->data->scattered_rx)
 			PMD_INIT_LOG(DEBUG, "forcing scatter mode");
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v3 2/6] ixgbe: Negotiate VF API version
  2014-12-24  5:22   ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Ouyang Changchun
  2014-12-24  5:22     ` [dpdk-dev] [PATCH v3 1/6] ixgbe: Code cleanup Ouyang Changchun
@ 2014-12-24  5:23     ` Ouyang Changchun
  2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 3/6] ixgbe: Get VF queue number Ouyang Changchun
                       ` (6 subsequent siblings)
  8 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-24  5:23 UTC (permalink / raw)
  To: dev

Negotiate API version with VF when receiving the IXGBE_VF_API_NEGOTIATE message.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  1 +
 lib/librte_pmd_ixgbe/ixgbe_pf.c     | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
index ca99170..730098d 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
@@ -159,6 +159,7 @@ struct ixgbe_vf_info {
 	uint16_t tx_rate[IXGBE_MAX_QUEUE_NUM_PER_VF];
 	uint16_t vlan_count;
 	uint8_t spoofchk_enabled;
+	uint8_t api_version;
 };
 
 /*
diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index 51da1fd..495aff5 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -469,6 +469,28 @@ ixgbe_set_vf_lpe(struct rte_eth_dev *dev, __rte_unused uint32_t vf, uint32_t *ms
 }
 
 static int
+ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
+{
+	uint32_t api_version = msgbuf[1];
+	struct ixgbe_vf_info *vfinfo =
+		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
+
+	switch (api_version) {
+	case ixgbe_mbox_api_10:
+	case ixgbe_mbox_api_11:
+		vfinfo[vf].api_version = (uint8_t)api_version;
+		return 0;
+	default:
+		break;
+	}
+
+	RTE_LOG(ERR, PMD, "Negotiate invalid api version %u from VF %d\n",
+		api_version, vf);
+
+	return -1;
+}
+
+static int
 ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 {
 	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
@@ -512,6 +534,9 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 	case IXGBE_VF_SET_VLAN:
 		retval = ixgbe_vf_set_vlan(dev, vf, msgbuf);
 		break;
+	case IXGBE_VF_API_NEGOTIATE:
+		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
+		break;
 	default:
 		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v3 3/6] ixgbe: Get VF queue number
  2014-12-24  5:22   ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Ouyang Changchun
  2014-12-24  5:22     ` [dpdk-dev] [PATCH v3 1/6] ixgbe: Code cleanup Ouyang Changchun
  2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
@ 2014-12-24  5:23     ` Ouyang Changchun
  2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 4/6] ether: Check VMDq RSS mode Ouyang Changchun
                       ` (5 subsequent siblings)
  8 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-24  5:23 UTC (permalink / raw)
  To: dev

Get the available Rx and Tx queue number when receiving IXGBE_VF_GET_QUEUES message from VF.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_pf.c | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index 495aff5..cbb0145 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -53,6 +53,8 @@
 #include "ixgbe_ethdev.h"
 
 #define IXGBE_MAX_VFTA     (128)
+#define IXGBE_VF_MSG_SIZE_DEFAULT 1
+#define IXGBE_VF_GET_QUEUE_MSG_SIZE 5
 
 static inline uint16_t
 dev_num_vf(struct rte_eth_dev *eth_dev)
@@ -491,9 +493,36 @@ ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
 }
 
 static int
+ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
+{
+	struct ixgbe_vf_info *vfinfo =
+		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
+	uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+
+	/* Verify if the PF supports the mbox APIs version or not */
+	switch (vfinfo[vf].api_version) {
+	case ixgbe_mbox_api_20:
+	case ixgbe_mbox_api_11:
+		break;
+	default:
+		return -1;
+	}
+
+	/* Notify VF of Rx and Tx queue number */
+	msgbuf[IXGBE_VF_RX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+	msgbuf[IXGBE_VF_TX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+
+	/* Notify VF of default queue */
+	msgbuf[IXGBE_VF_DEF_QUEUE] = default_q;
+
+	return 0;
+}
+
+static int
 ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 {
 	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
+	uint16_t msg_size = IXGBE_VF_MSG_SIZE_DEFAULT;
 	uint32_t msgbuf[IXGBE_VFMAILBOX_SIZE];
 	int32_t retval;
 	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -537,6 +566,10 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 	case IXGBE_VF_API_NEGOTIATE:
 		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
 		break;
+	case IXGBE_VF_GET_QUEUES:
+		retval = ixgbe_get_vf_queues(dev, vf, msgbuf);
+		msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE;
+		break;
 	default:
 		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
@@ -551,7 +584,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 
 	msgbuf[0] |= IXGBE_VT_MSGTYPE_CTS;
 
-	ixgbe_write_mbx(hw, msgbuf, 1, vf);
+	ixgbe_write_mbx(hw, msgbuf, msg_size, vf);
 
 	return retval;
 }
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v3 4/6] ether: Check VMDq RSS mode
  2014-12-24  5:22   ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Ouyang Changchun
                       ` (2 preceding siblings ...)
  2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 3/6] ixgbe: Get VF queue number Ouyang Changchun
@ 2014-12-24  5:23     ` Ouyang Changchun
  2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS Ouyang Changchun
                       ` (4 subsequent siblings)
  8 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-24  5:23 UTC (permalink / raw)
  To: dev

Check mq mode for VMDq RSS, handle it correctly instead of returning an error;
Also remove the limitation of per pool queue number has max value of 1, because
the per pool queue number could be 2 or 4 if it is VMDq RSS mode;

The number of rxq specified in config will determine the mq mode for VMDq RSS.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_ether/rte_ethdev.c | 39 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 95f2ceb..59ff325 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -510,8 +510,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 
 	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
 		/* check multi-queue mode */
-		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
-		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
+		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
 		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||
 		    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
 			/* SRIOV only works in VMDq enable mode */
@@ -525,7 +524,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		}
 
 		switch (dev_conf->rxmode.mq_mode) {
-		case ETH_MQ_RX_VMDQ_RSS:
 		case ETH_MQ_RX_VMDQ_DCB:
 		case ETH_MQ_RX_VMDQ_DCB_RSS:
 			/* DCB/RSS VMDQ in SRIOV mode, not implement yet */
@@ -534,6 +532,39 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 					"unsupported VMDQ mq_mode rx %u\n",
 					port_id, dev_conf->rxmode.mq_mode);
 			return (-EINVAL);
+		case ETH_MQ_RX_RSS:
+			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
+					" SRIOV active, "
+					"Rx mq mode is changed from:"
+					"mq_mode %u into VMDQ mq_mode %u\n",
+					port_id,
+					dev_conf->rxmode.mq_mode,
+					dev->data->dev_conf.rxmode.mq_mode);
+		case ETH_MQ_RX_VMDQ_RSS:
+			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_RSS;
+			if (nb_rx_q < RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) {
+				switch (nb_rx_q) {
+				case 1:
+				case 2:
+					RTE_ETH_DEV_SRIOV(dev).active =
+						ETH_64_POOLS;
+					break;
+				case 4:
+					RTE_ETH_DEV_SRIOV(dev).active =
+						ETH_32_POOLS;
+					break;
+				default:
+					PMD_DEBUG_TRACE("ethdev port_id=%d"
+						" SRIOV active, "
+						"queue number invalid\n",
+						port_id);
+					return -EINVAL;
+				}
+				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
+				RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
+					dev->pci_dev->max_vfs * nb_rx_q;
+			}
+			break;
 		default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
 			/* if nothing mq mode configure, use default scheme */
 			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;
@@ -553,8 +584,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
 			/* if nothing mq mode configure, use default scheme */
 			dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;
-			if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
-				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
 			break;
 		}
 
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-24  5:22   ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Ouyang Changchun
                       ` (3 preceding siblings ...)
  2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 4/6] ether: Check VMDq RSS mode Ouyang Changchun
@ 2014-12-24  5:23     ` Ouyang Changchun
  2014-12-24 10:39       ` Vlad Zolotarov
  2015-01-04  2:10       ` Liang, Cunming
  2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
                       ` (3 subsequent siblings)
  8 siblings, 2 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-24  5:23 UTC (permalink / raw)
  To: dev

It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF RSS.

The psrtype will determine how many queues the received packets will distribute to,
and the value of psrtype should depends on both facet: max VF rxq number which
has been negotiated with PF, and the number of rxq specified in config on guest.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 97 insertions(+), 10 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index cbb0145..9c9dad8 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev *eth_dev)
 	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw->mac.num_rar_entries), 0);
 	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw->mac.num_rar_entries), 0);
 
+	/*
+	 * VF RSS can support at most 4 queues for each VF, even if
+	 * 8 queues are available for each VF, it need refine to 4
+	 * queues here due to this limitation, otherwise no queue
+	 * will receive any packet even RSS is enabled.
+	 */
+	if (eth_dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_RSS) {
+		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
+			RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_32_POOLS;
+			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
+			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
+				dev_num_vf(eth_dev) * 4;
+		}
+	}
+
 	/* set VMDq map to default PF pool */
 	hw->mac.ops.set_vmdq(hw, 0, RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
 
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index f69abda..a7c17a4 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
 }
 
 static int
+ixgbe_config_vf_rss(struct rte_eth_dev *dev)
+{
+	struct ixgbe_hw *hw;
+	uint32_t mrqc;
+
+	ixgbe_rss_configure(dev);
+
+	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+	/* MRQC: enable VF RSS */
+	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
+	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
+	switch (RTE_ETH_DEV_SRIOV(dev).active) {
+	case ETH_64_POOLS:
+		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
+		break;
+
+	case ETH_32_POOLS:
+	case ETH_16_POOLS:
+		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
+		break;
+
+	default:
+		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
+		return -EINVAL;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+
+	return 0;
+}
+
+static int
 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw *hw =
@@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
 			default: ixgbe_rss_disable(dev);
 		}
 	} else {
-		switch (RTE_ETH_DEV_SRIOV(dev).active) {
 		/*
 		 * SRIOV active scheme
 		 * FIXME if support DCB/RSS together with VMDq & SRIOV
 		 */
-		case ETH_64_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQEN);
+		switch (dev->data->dev_conf.rxmode.mq_mode) {
+		case ETH_MQ_RX_RSS:
+		case ETH_MQ_RX_VMDQ_RSS:
+			ixgbe_config_vf_rss(dev);
 			break;
 
-		case ETH_32_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT4TCEN);
-			break;
+		default:
+			switch (RTE_ETH_DEV_SRIOV(dev).active) {
+			case ETH_64_POOLS:
+				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+					IXGBE_MRQC_VMDQEN);
+				break;
 
-		case ETH_16_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT8TCEN);
+			case ETH_32_POOLS:
+				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+					IXGBE_MRQC_VMDQRT4TCEN);
+				break;
+
+			case ETH_16_POOLS:
+				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+					IXGBE_MRQC_VMDQRT8TCEN);
+				break;
+			default:
+				PMD_INIT_LOG(ERR,
+					"invalid pool number in IOV mode");
+				break;
+			}
 			break;
-		default:
-			PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
 		}
 	}
 
@@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 	uint16_t buf_size;
 	uint16_t i;
 	int ret;
+	uint16_t valid_rxq_num;
 
 	PMD_INIT_FUNC_TRACE();
 	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
+	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues, hw->mac.max_rx_queues);
+
+	/*
+	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
+	 * and give user a hint that some packets may loss if it doesn't
+	 * poll the queue where those packets are distributed to.
+	 */
+	if (valid_rxq_num == 3)
+		valid_rxq_num = 4;
+
+	if (dev->data->nb_rx_queues > valid_rxq_num) {
+		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
+			"it should be equal to or less than %d",
+			valid_rxq_num);
+		return -1;
+	} else if (dev->data->nb_rx_queues < valid_rxq_num)
+		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
+			"than the number of available Rx queues:%d, "
+			"packets in Rx queues(q_id >= %d) may loss.",
+			valid_rxq_num, dev->data->nb_rx_queues);
+
 	/*
 	 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
 	 * disables the VF receipt of packets if the PF MTU is > 1500.
@@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 			IXGBE_PSRTYPE_IPV6HDR;
 #endif
 
+	/* Set RQPL for VF RSS according to max Rx queue */
+	psrtype |= (valid_rxq_num >> 1) <<
+		IXGBE_PSRTYPE_RQPL_SHIFT;
 	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
 
 	if (dev->data->dev_conf.rxmode.enable_scatter) {
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v3 6/6] testpmd: Set Rx VMDq RSS mode
  2014-12-24  5:22   ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Ouyang Changchun
                       ` (4 preceding siblings ...)
  2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS Ouyang Changchun
@ 2014-12-24  5:23     ` Ouyang Changchun
  2014-12-24  9:59     ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Vlad Zolotarov
                       ` (2 subsequent siblings)
  8 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2014-12-24  5:23 UTC (permalink / raw)
  To: dev

Set VMDq RSS mode if it has VF(VF number is more than 1) and has RSS information.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 app/test-pmd/testpmd.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 8c69756..6230f8b 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -1708,6 +1708,16 @@ init_port_config(void)
 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
 		}
 
+		if (port->dev_info.max_vfs != 0) {
+			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
+				port->dev_conf.rxmode.mq_mode =
+					ETH_MQ_RX_VMDQ_RSS;
+			else {
+				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
+				port->dev_conf.txmode.mq_mode = ETH_MQ_TX_NONE;
+			}
+		}
+
 		port->rx_conf.rx_thresh = rx_thresh;
 		port->rx_conf.rx_free_thresh = rx_free_thresh;
 		port->rx_conf.rx_drop_en = rx_drop_en;
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
  2014-12-24  5:22   ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Ouyang Changchun
                       ` (5 preceding siblings ...)
  2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
@ 2014-12-24  9:59     ` Vlad Zolotarov
  2014-12-25  1:46       ` Ouyang, Changchun
  2014-12-24 10:49     ` Vlad Zolotarov
  2015-01-04  7:18     ` [dpdk-dev] [PATCH v4 " Ouyang Changchun
  8 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2014-12-24  9:59 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 12/24/14 07:22, Ouyang Changchun wrote:
> This patch enables VF RSS for Niantic, which allow each VF having at most 4 queues.
> The actual queue number per VF depends on the total number of pool, which is
> determined by the total number of VF at PF initialization stage and the number of
> queue specified in config:
> 1) If the number of VF is in the range from 1 to 32 and the number of rxq is 4('--rxq 4' in testpmd),
> then there is totally 32 pools(ETH_32_POOLS), and each VF have 4 queues;
>   
> 2)If the number of VF is in the range from 33 to 64 and the number of rxq is 2('--rxq 2' in testpmd),
> then there is totally 64 pools(ETH_64_POOLS), and each VF have 2 queues;
>   
> On host, to enable VF RSS functionality, rx mq mode should be set as ETH_MQ_RX_VMDQ_RSS
> or ETH_MQ_RX_RSS mode, and SRIOV mode should be activated(max_vfs >= 1).
> It also needs config VF RSS information like hash function, RSS key, RSS key length.
>   
> The limitation for Niantic VF RSS is:
> the hash and key are shared among PF and all VF

Hmmm... This kinda contradicts the previous sentence where u say that VF 
on the host should configure hash and RSS key. If PF and VF share the 
same hash and key what's the use of configuring it in VF? Could u 
clarify, please?

> , the RETA table with 128 entries are
> also shared among PF and all VF. So it is not good idea to query the hash and reta content per VF on
> guest, instead, it makes sense to query them on host(PF).

On the contrary - it's a very good idea! We use DPDK on Amazon's guests 
with enhanced networking and we have no access to the PF. We still need 
to know the RSS redirection rules for our VF pool. From the 82599 spec, 
chapter 4.6.10.1.1: "redirection table is common to all the pools and 
only indicates the queue inside the
pool to use once the pool is chosen". In that case we need to get the 
whole 128 entries of the RETA. Is there a reason why we can't have it?

>   
> v3 change:
>    - More cleanup;
>
> v2 change:
>    - Update the description;
>    - Use receiving queue number('--rxq <q-num>') specified in config to determine the number of pool and
>      the number of queue per VF;
>   
> v1 change:
>    - Config VF RSS;
>
> Changchun Ouyang (6):
>    ixgbe: Code cleanup
>    ixgbe: Negotiate VF API version
>    ixgbe: Get VF queue number
>    ether: Check VMDq RSS mode
>    ixgbe: Config VF RSS
>    testpmd: Set Rx VMDq RSS mode
>
>   app/test-pmd/testpmd.c              |  10 +++
>   lib/librte_ether/rte_ethdev.c       |  39 +++++++++--
>   lib/librte_pmd_ixgbe/ixgbe_ethdev.h |   1 +
>   lib/librte_pmd_ixgbe/ixgbe_pf.c     |  75 ++++++++++++++++++++-
>   lib/librte_pmd_ixgbe/ixgbe_rxtx.c   | 127 ++++++++++++++++++++++++++++--------
>   5 files changed, 219 insertions(+), 33 deletions(-)
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS Ouyang Changchun
@ 2014-12-24 10:39       ` Vlad Zolotarov
  2014-12-25  2:14         ` Ouyang, Changchun
  2014-12-25  2:43         ` Ouyang, Changchun
  2015-01-04  2:10       ` Liang, Cunming
  1 sibling, 2 replies; 144+ messages in thread
From: Vlad Zolotarov @ 2014-12-24 10:39 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 12/24/14 07:23, Ouyang Changchun wrote:
> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF RSS.
>
> The psrtype will determine how many queues the received packets will distribute to,
> and the value of psrtype should depends on both facet: max VF rxq number which
> has been negotiated with PF, and the number of rxq specified in config on guest.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>   lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
>   lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92 ++++++++++++++++++++++++++++++++++-----
>   2 files changed, 97 insertions(+), 10 deletions(-)
>
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> index cbb0145..9c9dad8 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev *eth_dev)
>   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw->mac.num_rar_entries), 0);
>   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw->mac.num_rar_entries), 0);
>   
> +	/*
> +	 * VF RSS can support at most 4 queues for each VF, even if
> +	 * 8 queues are available for each VF, it need refine to 4
> +	 * queues here due to this limitation, otherwise no queue
> +	 * will receive any packet even RSS is enabled.

According to Table 7-3 in the 82599 spec RSS is not available when port 
is configured to have 8 queues per pool. This means that if u see this 
configuration u may immediately disable RSS flow in your code.

> +	 */
> +	if (eth_dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_RSS) {
> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> +			RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_32_POOLS;
> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> +				dev_num_vf(eth_dev) * 4;

According to 82599 spec u can't do that since RSS is not allowed when 
port is configured to have 8 function per-VF. Have u verified that this 
works? If yes, then spec should be updated.

> +		}
> +	}
> +
>   	/* set VMDq map to default PF pool */
>   	hw->mac.ops.set_vmdq(hw, 0, RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
>   
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> index f69abda..a7c17a4 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
>   }
>   
>   static int
> +ixgbe_config_vf_rss(struct rte_eth_dev *dev)
> +{
> +	struct ixgbe_hw *hw;
> +	uint32_t mrqc;
> +
> +	ixgbe_rss_configure(dev);
> +
> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +
> +	/* MRQC: enable VF RSS */
> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> +	case ETH_64_POOLS:
> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> +		break;
> +
> +	case ETH_32_POOLS:
> +	case ETH_16_POOLS:
> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;

Again, this contradicts with the spec.

> +		break;
> +
> +	default:
> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> +		return -EINVAL;
> +	}
> +
> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> +
> +	return 0;
> +}
> +
> +static int
>   ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>   {
>   	struct ixgbe_hw *hw =
> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>   			default: ixgbe_rss_disable(dev);
>   		}
>   	} else {
> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
>   		/*
>   		 * SRIOV active scheme
>   		 * FIXME if support DCB/RSS together with VMDq & SRIOV
>   		 */
> -		case ETH_64_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQEN);
> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> +		case ETH_MQ_RX_RSS:
> +		case ETH_MQ_RX_VMDQ_RSS:
> +			ixgbe_config_vf_rss(dev);
>   			break;
>   
> -		case ETH_32_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT4TCEN);
> -			break;
> +		default:
> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {

Sorry for nitpicking but have u considered taking this encapsulated 
"switch-case" block into a separate function? This could make the code 
look a lot nicer. ;)

> +			case ETH_64_POOLS:
> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +					IXGBE_MRQC_VMDQEN);
> +				break;
>   
> -		case ETH_16_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT8TCEN);
> +			case ETH_32_POOLS:
> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +					IXGBE_MRQC_VMDQRT4TCEN);
> +				break;
> +
> +			case ETH_16_POOLS:
> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +					IXGBE_MRQC_VMDQRT8TCEN);
> +				break;
> +			default:
> +				PMD_INIT_LOG(ERR,
> +					"invalid pool number in IOV mode");
> +				break;
> +			}
>   			break;
> -		default:
> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
>   		}
>   	}
>   
> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>   	uint16_t buf_size;
>   	uint16_t i;
>   	int ret;
> +	uint16_t valid_rxq_num;
>   
>   	PMD_INIT_FUNC_TRACE();
>   	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>   
> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues, hw->mac.max_rx_queues);
> +
> +	/*
> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> +	 * and give user a hint that some packets may loss if it doesn't
> +	 * poll the queue where those packets are distributed to.
> +	 */
> +	if (valid_rxq_num == 3)
> +		valid_rxq_num = 4;

Why to configure more queues that requested and not less (2)? Why to 
configure anything at all and not return an error?

> +
> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> +			"it should be equal to or less than %d",
> +			valid_rxq_num);
> +		return -1;
> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> +			"than the number of available Rx queues:%d, "
> +			"packets in Rx queues(q_id >= %d) may loss.",
> +			valid_rxq_num, dev->data->nb_rx_queues);

Who ever looks in the "INIT_LOG" if everything "work well" and u make it 
look so by allowing this call to succeed. And then some packets will 
just silently not arrive?! And what the used should somehow guess to do? 
- Look in the "INIT_LOG"?! This is a nightmare!

> +
>   	/*
>   	 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
>   	 * disables the VF receipt of packets if the PF MTU is > 1500.
> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>   			IXGBE_PSRTYPE_IPV6HDR;
>   #endif
>   
> +	/* Set RQPL for VF RSS according to max Rx queue */
> +	psrtype |= (valid_rxq_num >> 1) <<
> +		IXGBE_PSRTYPE_RQPL_SHIFT;
>   	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
>   
>   	if (dev->data->dev_conf.rxmode.enable_scatter) {

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
  2014-12-24  5:22   ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Ouyang Changchun
                       ` (6 preceding siblings ...)
  2014-12-24  9:59     ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Vlad Zolotarov
@ 2014-12-24 10:49     ` Vlad Zolotarov
  2014-12-25  2:26       ` Ouyang, Changchun
  2015-01-04  7:18     ` [dpdk-dev] [PATCH v4 " Ouyang Changchun
  8 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2014-12-24 10:49 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 12/24/14 07:22, Ouyang Changchun wrote:
> This patch enables VF RSS for Niantic, which allow each VF having at most 4 queues.
> The actual queue number per VF depends on the total number of pool, which is
> determined by the total number of VF at PF initialization stage and the number of
> queue specified in config:
> 1) If the number of VF is in the range from 1 to 32 and the number of rxq is 4('--rxq 4' in testpmd),
> then there is totally 32 pools(ETH_32_POOLS), and each VF have 4 queues;
>   
> 2)If the number of VF is in the range from 33 to 64 and the number of rxq is 2('--rxq 2' in testpmd),
> then there is totally 64 pools(ETH_64_POOLS), and each VF have 2 queues;
>   
> On host, to enable VF RSS functionality, rx mq mode should be set as ETH_MQ_RX_VMDQ_RSS
> or ETH_MQ_RX_RSS mode, and SRIOV mode should be activated(max_vfs >= 1).
> It also needs config VF RSS information like hash function, RSS key, RSS key length.
>   
> The limitation for Niantic VF RSS is:
> the hash and key are shared among PF and all VF, the RETA table with 128 entries are
> also shared among PF and all VF. So it is not good idea to query the hash and reta content per VF on
> guest, instead, it makes sense to query them on host(PF).
>   
> v3 change:
>    - More cleanup;

This series is still missing the appropriate patches in the 
rte_eth_dev_info_get() flow to return a reta_size for a VF device; and 
to rte_eth_dev_rss_reta_query() in the context of
a VF device (I haven't noticed the initialization of a 
dev->dev_ops->reta_query for the VF device in this series).

Without these code bits it's impossible to work with the VF devices in 
the RSS context the same way we work with the PF devices. It means that 
we'll have to do some special branching to handle the VF device and this 
voids the whole meaning of the framework which in turn is very unfortunate.

>
> v2 change:
>    - Update the description;
>    - Use receiving queue number('--rxq <q-num>') specified in config to determine the number of pool and
>      the number of queue per VF;
>   
> v1 change:
>    - Config VF RSS;
>
> Changchun Ouyang (6):
>    ixgbe: Code cleanup
>    ixgbe: Negotiate VF API version
>    ixgbe: Get VF queue number
>    ether: Check VMDq RSS mode
>    ixgbe: Config VF RSS
>    testpmd: Set Rx VMDq RSS mode
>
>   app/test-pmd/testpmd.c              |  10 +++
>   lib/librte_ether/rte_ethdev.c       |  39 +++++++++--
>   lib/librte_pmd_ixgbe/ixgbe_ethdev.h |   1 +
>   lib/librte_pmd_ixgbe/ixgbe_pf.c     |  75 ++++++++++++++++++++-
>   lib/librte_pmd_ixgbe/ixgbe_rxtx.c   | 127 ++++++++++++++++++++++++++++--------
>   5 files changed, 219 insertions(+), 33 deletions(-)
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
  2014-12-24  9:59     ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Vlad Zolotarov
@ 2014-12-25  1:46       ` Ouyang, Changchun
  2015-01-05 10:38         ` Bruce Richardson
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2014-12-25  1:46 UTC (permalink / raw)
  To: Vlad Zolotarov, dev

Hi,

> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Wednesday, December 24, 2014 5:59 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
> 
> 
> On 12/24/14 07:22, Ouyang Changchun wrote:
> > This patch enables VF RSS for Niantic, which allow each VF having at most 4
> queues.
> > The actual queue number per VF depends on the total number of pool,
> > which is determined by the total number of VF at PF initialization
> > stage and the number of queue specified in config:
> > 1) If the number of VF is in the range from 1 to 32 and the number of
> > rxq is 4('--rxq 4' in testpmd), then there is totally 32
> > pools(ETH_32_POOLS), and each VF have 4 queues;
> >
> > 2)If the number of VF is in the range from 33 to 64 and the number of
> > rxq is 2('--rxq 2' in testpmd), then there is totally 64
> > pools(ETH_64_POOLS), and each VF have 2 queues;
> >
> > On host, to enable VF RSS functionality, rx mq mode should be set as
> > ETH_MQ_RX_VMDQ_RSS or ETH_MQ_RX_RSS mode, and SRIOV mode
> should be activated(max_vfs >= 1).
> > It also needs config VF RSS information like hash function, RSS key, RSS key
> length.
> >
> > The limitation for Niantic VF RSS is:
> > the hash and key are shared among PF and all VF
> 
> Hmmm... This kinda contradicts the previous sentence where u say that VF
> on the host should configure hash and RSS key. If PF and VF share the same
> hash and key what's the use of configuring it in VF? Could u clarify, please?

What make you think of any "contradicts"? To be more clear, would you pls copy and paste which 2 sentences you think of "contradicts",
I can correct it if they are, but currently I don't find them. 
Share means vf doesn't has its own hash function, hash key, and reta table.
 
> > , the RETA table with 128 entries are
> > also shared among PF and all VF. So it is not good idea to query the
> > hash and reta content per VF on guest, instead, it makes sense to query
> them on host(PF).
> 
> On the contrary - it's a very good idea! We use DPDK on Amazon's guests
> with enhanced networking and we have no access to the PF. We still need to
> know the RSS redirection rules for our VF pool. From the 82599 spec, chapter
> 4.6.10.1.1: "redirection table is common to all the pools and only indicates the
> queue inside the pool to use once the pool is chosen". In that case we need
> to get the whole 128 entries of the RETA. Is there a reason why we can't have
> it?
>
Due to hardware limitation, VF could not query its own reta table, because there is not its own reta,
The reta table shared by pf and all vfs.
If you need know it, query them on pf is feasible way to do it.

> >
> > v3 change:
> >    - More cleanup;
> >
> > v2 change:
> >    - Update the description;
> >    - Use receiving queue number('--rxq <q-num>') specified in config to
> determine the number of pool and
> >      the number of queue per VF;
> >
> > v1 change:
> >    - Config VF RSS;
> >
> > Changchun Ouyang (6):
> >    ixgbe: Code cleanup
> >    ixgbe: Negotiate VF API version
> >    ixgbe: Get VF queue number
> >    ether: Check VMDq RSS mode
> >    ixgbe: Config VF RSS
> >    testpmd: Set Rx VMDq RSS mode
> >
> >   app/test-pmd/testpmd.c              |  10 +++
> >   lib/librte_ether/rte_ethdev.c       |  39 +++++++++--
> >   lib/librte_pmd_ixgbe/ixgbe_ethdev.h |   1 +
> >   lib/librte_pmd_ixgbe/ixgbe_pf.c     |  75 ++++++++++++++++++++-
> >   lib/librte_pmd_ixgbe/ixgbe_rxtx.c   | 127
> ++++++++++++++++++++++++++++--------
> >   5 files changed, 219 insertions(+), 33 deletions(-)
> >

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-24 10:39       ` Vlad Zolotarov
@ 2014-12-25  2:14         ` Ouyang, Changchun
  2014-12-25 13:13           ` Vlad Zolotarov
  2014-12-25  2:43         ` Ouyang, Changchun
  1 sibling, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2014-12-25  2:14 UTC (permalink / raw)
  To: Vlad Zolotarov, dev

Hi,

> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Wednesday, December 24, 2014 6:40 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> 
> On 12/24/14 07:23, Ouyang Changchun wrote:
> > It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
> RSS.
> >
> > The psrtype will determine how many queues the received packets will
> > distribute to, and the value of psrtype should depends on both facet:
> > max VF rxq number which has been negotiated with PF, and the number of
> rxq specified in config on guest.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >   lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> >   lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> ++++++++++++++++++++++++++++++++++-----
> >   2 files changed, 97 insertions(+), 10 deletions(-)
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> *eth_dev)
> >   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> >mac.num_rar_entries), 0);
> >   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> >mac.num_rar_entries), 0);
> >
> > +	/*
> > +	 * VF RSS can support at most 4 queues for each VF, even if
> > +	 * 8 queues are available for each VF, it need refine to 4
> > +	 * queues here due to this limitation, otherwise no queue
> > +	 * will receive any packet even RSS is enabled.
> 
> According to Table 7-3 in the 82599 spec RSS is not available when port is
> configured to have 8 queues per pool. This means that if u see this
> configuration u may immediately disable RSS flow in your code.
> 
8 queues here means the available number queue per vf, it is calculated according to max vfs,
e.g. if max vfs is 16(or less than), then each vf 'COULD' have 8 queues evenly, pf early init stage estimate this value,
but that is not precise, so need refine this.
User don't know this estimated value, it is internal value, not come from user's input/configure.
Hope it is clear to you.
> > +	 */
> > +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> ETH_MQ_RX_VMDQ_RSS) {
> > +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> > +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> ETH_32_POOLS;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> > +				dev_num_vf(eth_dev) * 4;
> 
> According to 82599 spec u can't do that since RSS is not allowed when port is
> configured to have 8 function per-VF. Have u verified that this works? If yes,
> then spec should be updated.
>
Response as above,
Of course I have validated this. It works well.

> > +		}
> > +	}
> > +
> >   	/* set VMDq map to default PF pool */
> >   	hw->mac.ops.set_vmdq(hw, 0,
> > RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > index f69abda..a7c17a4 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> igb_rx_queue *rxq)
> >   }
> >
> >   static int
> > +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> > +	struct ixgbe_hw *hw;
> > +	uint32_t mrqc;
> > +
> > +	ixgbe_rss_configure(dev);
> > +
> > +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > +
> > +	/* MRQC: enable VF RSS */
> > +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> > +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> > +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > +	case ETH_64_POOLS:
> > +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> > +		break;
> > +
> > +	case ETH_32_POOLS:
> > +	case ETH_16_POOLS:
> > +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> 
> Again, this contradicts with the spec.
> 
> > +		break;
> > +
> > +	default:
> > +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> > +		return -EINVAL;
> > +	}
> > +
> > +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> > +
> > +	return 0;
> > +}
> > +
> > +static int
> >   ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
> >   {
> >   	struct ixgbe_hw *hw =
> > @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
> rte_eth_dev *dev)
> >   			default: ixgbe_rss_disable(dev);
> >   		}
> >   	} else {
> > -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >   		/*
> >   		 * SRIOV active scheme
> >   		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> >   		 */
> > -		case ETH_64_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQEN);
> > +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> > +		case ETH_MQ_RX_RSS:
> > +		case ETH_MQ_RX_VMDQ_RSS:
> > +			ixgbe_config_vf_rss(dev);
> >   			break;
> >
> > -		case ETH_32_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQRT4TCEN);
> > -			break;
> > +		default:
> > +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
> 
> Sorry for nitpicking but have u considered taking this encapsulated "switch-
> case" block into a separate function? This could make the code look a lot
> nicer. ;)

Only one place use it, so don't need make it a function,
And I prefer to the current code.

> 
> > +			case ETH_64_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQEN);
> > +				break;
> >
> > -		case ETH_16_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQRT8TCEN);
> > +			case ETH_32_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQRT4TCEN);
> > +				break;
> > +
> > +			case ETH_16_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQRT8TCEN);
> > +				break;
> > +			default:
> > +				PMD_INIT_LOG(ERR,
> > +					"invalid pool number in IOV mode");
> > +				break;
> > +			}
> >   			break;
> > -		default:
> > -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
> mode");
> >   		}
> >   	}
> >
> > @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >   	uint16_t buf_size;
> >   	uint16_t i;
> >   	int ret;
> > +	uint16_t valid_rxq_num;
> >
> >   	PMD_INIT_FUNC_TRACE();
> >   	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >
> > +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
> > +hw->mac.max_rx_queues);
> > +
> > +	/*
> > +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> > +	 * and give user a hint that some packets may loss if it doesn't
> > +	 * poll the queue where those packets are distributed to.
> > +	 */
> > +	if (valid_rxq_num == 3)
> > +		valid_rxq_num = 4;
> 
> Why to configure more queues that requested and not less (2)? Why to
> configure anything at all and not return an error?
> 
> > +
> > +	if (dev->data->nb_rx_queues > valid_rxq_num) {
> > +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> > +			"it should be equal to or less than %d",
> > +			valid_rxq_num);
> > +		return -1;
> > +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
> > +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> > +			"than the number of available Rx queues:%d, "
> > +			"packets in Rx queues(q_id >= %d) may loss.",
> > +			valid_rxq_num, dev->data->nb_rx_queues);
> 
> Who ever looks in the "INIT_LOG" if everything "work well" and u make it
> look so by allowing this call to succeed. And then some packets will just
> silently not arrive?! And what the used should somehow guess to do?
> - Look in the "INIT_LOG"?! This is a nightmare!

Sorry, I don't think so again, if user find any packets loss, he will care for log, 
Then he can find that log there, then user can refine its rxq number due the wrong rxq number,
Why is it a nightmare?

I don't agree with you about "silently not arrive", because we have hint/log there.

Return error here is also possible way, 
Again need other guys' insight here.

> > +
> >   	/*
> >   	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
> driver
> >   	 * disables the VF receipt of packets if the PF MTU is > 1500.
> > @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >   			IXGBE_PSRTYPE_IPV6HDR;
> >   #endif
> >
> > +	/* Set RQPL for VF RSS according to max Rx queue */
> > +	psrtype |= (valid_rxq_num >> 1) <<
> > +		IXGBE_PSRTYPE_RQPL_SHIFT;
> >   	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> >
> >   	if (dev->data->dev_conf.rxmode.enable_scatter) {

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
  2014-12-24 10:49     ` Vlad Zolotarov
@ 2014-12-25  2:26       ` Ouyang, Changchun
  2014-12-25 12:46         ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2014-12-25  2:26 UTC (permalink / raw)
  To: Vlad Zolotarov, dev

Hi,

> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Wednesday, December 24, 2014 6:49 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
> 
> 
> On 12/24/14 07:22, Ouyang Changchun wrote:
> > This patch enables VF RSS for Niantic, which allow each VF having at most 4
> queues.
> > The actual queue number per VF depends on the total number of pool,
> > which is determined by the total number of VF at PF initialization
> > stage and the number of queue specified in config:
> > 1) If the number of VF is in the range from 1 to 32 and the number of
> > rxq is 4('--rxq 4' in testpmd), then there is totally 32
> > pools(ETH_32_POOLS), and each VF have 4 queues;
> >
> > 2)If the number of VF is in the range from 33 to 64 and the number of
> > rxq is 2('--rxq 2' in testpmd), then there is totally 64
> > pools(ETH_64_POOLS), and each VF have 2 queues;
> >
> > On host, to enable VF RSS functionality, rx mq mode should be set as
> > ETH_MQ_RX_VMDQ_RSS or ETH_MQ_RX_RSS mode, and SRIOV mode
> should be activated(max_vfs >= 1).
> > It also needs config VF RSS information like hash function, RSS key, RSS key
> length.
> >
> > The limitation for Niantic VF RSS is:
> > the hash and key are shared among PF and all VF, the RETA table with
> > 128 entries are also shared among PF and all VF. So it is not good
> > idea to query the hash and reta content per VF on guest, instead, it makes
> sense to query them on host(PF).
> >
> > v3 change:
> >    - More cleanup;
> 
> This series is still missing the appropriate patches in the
> rte_eth_dev_info_get() flow to return a reta_size for a VF device; and to
> rte_eth_dev_rss_reta_query() in the context of a VF device (I haven't
> noticed the initialization of a
> dev->dev_ops->reta_query for the VF device in this series).
> 
> Without these code bits it's impossible to work with the VF devices in the RSS
> context the same way we work with the PF devices. It means that we'll have
> to do some special branching to handle the VF device and this voids the
> whole meaning of the framework which in turn is very unfortunate.
> 
Again pls try to query reta content on pf/host, this is due to hw limitation,
It don't affect any functionality, just the querying is special.
Before this patch, customer often was notified Niantic can't support vf rss,
But with lots of experiments and find that it still has limited vf rss functionality.
Even on that, linux ixgbe driver has at most 2 queues per vf,
But the dpdk could enable 4 queues per vf.
In summary, dpdk could support vf rss on Niantic with at most 4 queues per vf,
but the querying of reta is very limited due to the HW limitation.  
Hope you are on the same page now.

Thanks
Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-24 10:39       ` Vlad Zolotarov
  2014-12-25  2:14         ` Ouyang, Changchun
@ 2014-12-25  2:43         ` Ouyang, Changchun
  2014-12-25 13:20           ` Vlad Zolotarov
  2014-12-25 13:38           ` Vlad Zolotarov
  1 sibling, 2 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2014-12-25  2:43 UTC (permalink / raw)
  To: Vlad Zolotarov, dev

Hi,
Sorry miss some comments, so continue my response below,

> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Wednesday, December 24, 2014 6:40 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> 
> On 12/24/14 07:23, Ouyang Changchun wrote:
> > It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
> RSS.
> >
> > The psrtype will determine how many queues the received packets will
> > distribute to, and the value of psrtype should depends on both facet:
> > max VF rxq number which has been negotiated with PF, and the number of
> rxq specified in config on guest.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >   lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> >   lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> ++++++++++++++++++++++++++++++++++-----
> >   2 files changed, 97 insertions(+), 10 deletions(-)
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> *eth_dev)
> >   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> >mac.num_rar_entries), 0);
> >   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> >mac.num_rar_entries), 0);
> >
> > +	/*
> > +	 * VF RSS can support at most 4 queues for each VF, even if
> > +	 * 8 queues are available for each VF, it need refine to 4
> > +	 * queues here due to this limitation, otherwise no queue
> > +	 * will receive any packet even RSS is enabled.
> 
> According to Table 7-3 in the 82599 spec RSS is not available when port is
> configured to have 8 queues per pool. This means that if u see this
> configuration u may immediately disable RSS flow in your code.
> 
> > +	 */
> > +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> ETH_MQ_RX_VMDQ_RSS) {
> > +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> > +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> ETH_32_POOLS;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> > +				dev_num_vf(eth_dev) * 4;
> 
> According to 82599 spec u can't do that since RSS is not allowed when port is
> configured to have 8 function per-VF. Have u verified that this works? If yes,
> then spec should be updated.
> 
> > +		}
> > +	}
> > +
> >   	/* set VMDq map to default PF pool */
> >   	hw->mac.ops.set_vmdq(hw, 0,
> > RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > index f69abda..a7c17a4 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> igb_rx_queue *rxq)
> >   }
> >
> >   static int
> > +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> > +	struct ixgbe_hw *hw;
> > +	uint32_t mrqc;
> > +
> > +	ixgbe_rss_configure(dev);
> > +
> > +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > +
> > +	/* MRQC: enable VF RSS */
> > +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> > +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> > +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > +	case ETH_64_POOLS:
> > +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> > +		break;
> > +
> > +	case ETH_32_POOLS:
> > +	case ETH_16_POOLS:
> > +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> 
> Again, this contradicts with the spec.
Yes, the spec say the hw can't support vf rss at all, but experiment find that could be done.
We can focus on discussing the implementation firstly.
 
> > +		break;
> > +
> > +	default:
> > +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> > +		return -EINVAL;
> > +	}
> > +
> > +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> > +
> > +	return 0;
> > +}
> > +
> > +static int
> >   ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
> >   {
> >   	struct ixgbe_hw *hw =
> > @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
> rte_eth_dev *dev)
> >   			default: ixgbe_rss_disable(dev);
> >   		}
> >   	} else {
> > -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >   		/*
> >   		 * SRIOV active scheme
> >   		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> >   		 */
> > -		case ETH_64_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQEN);
> > +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> > +		case ETH_MQ_RX_RSS:
> > +		case ETH_MQ_RX_VMDQ_RSS:
> > +			ixgbe_config_vf_rss(dev);
> >   			break;
> >
> > -		case ETH_32_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQRT4TCEN);
> > -			break;
> > +		default:
> > +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
> 
> Sorry for nitpicking but have u considered taking this encapsulated "switch-
> case" block into a separate function? This could make the code look a lot
> nicer. ;)
> 
> > +			case ETH_64_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQEN);
> > +				break;
> >
> > -		case ETH_16_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQRT8TCEN);
> > +			case ETH_32_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQRT4TCEN);
> > +				break;
> > +
> > +			case ETH_16_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQRT8TCEN);
> > +				break;
> > +			default:
> > +				PMD_INIT_LOG(ERR,
> > +					"invalid pool number in IOV mode");
> > +				break;
> > +			}
> >   			break;
> > -		default:
> > -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
> mode");
> >   		}
> >   	}
> >
> > @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >   	uint16_t buf_size;
> >   	uint16_t i;
> >   	int ret;
> > +	uint16_t valid_rxq_num;
> >
> >   	PMD_INIT_FUNC_TRACE();
> >   	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >
> > +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
> > +hw->mac.max_rx_queues);
> > +
> > +	/*
> > +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> > +	 * and give user a hint that some packets may loss if it doesn't
> > +	 * poll the queue where those packets are distributed to.
> > +	 */
> > +	if (valid_rxq_num == 3)
> > +		valid_rxq_num = 4;
> 
> Why to configure more queues that requested and not less (2)? Why to
> configure anything at all and not return an error?

Sorry, I don't agree this is "anything" you say, because I don't use 5,6,7, 8, ..., 16, 2014, 2015,... etc.
By considering 2 or 4,
I prefer 4, the reason is if user need more than 3 queues per vf to do something, 
And pf has also the capability to setup 4 queues per vf, confining to 2 queues is also not good thing,
So here try to enable 4 queues, and give user hints here.
Btw, change it into 2 is another way, depends on other guys' more insight here.

> 
> > +
> > +	if (dev->data->nb_rx_queues > valid_rxq_num) {
> > +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> > +			"it should be equal to or less than %d",
> > +			valid_rxq_num);
> > +		return -1;
> > +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
> > +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> > +			"than the number of available Rx queues:%d, "
> > +			"packets in Rx queues(q_id >= %d) may loss.",
> > +			valid_rxq_num, dev->data->nb_rx_queues);
> 
> Who ever looks in the "INIT_LOG" if everything "work well" and u make it
> look so by allowing this call to succeed. And then some packets will just
> silently not arrive?! And what the used should somehow guess to do?
> - Look in the "INIT_LOG"?! This is a nightmare!
> 
> > +
> >   	/*
> >   	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
> driver
> >   	 * disables the VF receipt of packets if the PF MTU is > 1500.
> > @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >   			IXGBE_PSRTYPE_IPV6HDR;
> >   #endif
> >
> > +	/* Set RQPL for VF RSS according to max Rx queue */
> > +	psrtype |= (valid_rxq_num >> 1) <<
> > +		IXGBE_PSRTYPE_RQPL_SHIFT;
> >   	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> >
> >   	if (dev->data->dev_conf.rxmode.enable_scatter) {

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
  2014-12-25  2:26       ` Ouyang, Changchun
@ 2014-12-25 12:46         ` Vlad Zolotarov
  2014-12-26  2:37           ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2014-12-25 12:46 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 12/25/14 04:26, Ouyang, Changchun wrote:
> Hi,
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Wednesday, December 24, 2014 6:49 PM
>> To: Ouyang, Changchun;dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
>>
>>
>> On 12/24/14 07:22, Ouyang Changchun wrote:
>>> This patch enables VF RSS for Niantic, which allow each VF having at most 4
>> queues.
>>> The actual queue number per VF depends on the total number of pool,
>>> which is determined by the total number of VF at PF initialization
>>> stage and the number of queue specified in config:
>>> 1) If the number of VF is in the range from 1 to 32 and the number of
>>> rxq is 4('--rxq 4' in testpmd), then there is totally 32
>>> pools(ETH_32_POOLS), and each VF have 4 queues;
>>>
>>> 2)If the number of VF is in the range from 33 to 64 and the number of
>>> rxq is 2('--rxq 2' in testpmd), then there is totally 64
>>> pools(ETH_64_POOLS), and each VF have 2 queues;
>>>
>>> On host, to enable VF RSS functionality, rx mq mode should be set as
>>> ETH_MQ_RX_VMDQ_RSS or ETH_MQ_RX_RSS mode, and SRIOV mode
>> should be activated(max_vfs >= 1).
>>> It also needs config VF RSS information like hash function, RSS key, RSS key
>> length.
>>> The limitation for Niantic VF RSS is:
>>> the hash and key are shared among PF and all VF, the RETA table with
>>> 128 entries are also shared among PF and all VF. So it is not good
>>> idea to query the hash and reta content per VF on guest, instead, it makes
>> sense to query them on host(PF).
>>> v3 change:
>>>     - More cleanup;
>> This series is still missing the appropriate patches in the
>> rte_eth_dev_info_get() flow to return a reta_size for a VF device; and to
>> rte_eth_dev_rss_reta_query() in the context of a VF device (I haven't
>> noticed the initialization of a
>> dev->dev_ops->reta_query for the VF device in this series).
>>
>> Without these code bits it's impossible to work with the VF devices in the RSS
>> context the same way we work with the PF devices. It means that we'll have
>> to do some special branching to handle the VF device and this voids the
>> whole meaning of the framework which in turn is very unfortunate.
>>
> Again pls try to query reta content on pf/host, this is due to hw limitation,

Again, I'm using DPDK from inside a Guest OS on Amazon Cloud. I have no 
and will never have an access to the PF due to obvious reasons thus I 
can't query it.
Which HW limitations u are referring? It's a clear software issue - the 
VF-PF channel protocol should have a message to negotiate it but it 
looks like Intel hasn't cared to implemented it yet unless I miss 
something here.
The problems don't end with the RETA. What about the hash key, which is 
also shared? There isn't an appropriate message to query it either. This 
is not a pure DPDK issue - it's a general issue with Linux 82599 drivers.

> It don't affect any functionality, just the querying is special.

How can u call the fact that some of DPDK API functionality is missing 
as "it don't affect any functionality"? Of course it affects it. Just 
like I said it may cause us treat the VF in the special way while there 
is not any real reason to do so.

> Before this patch, customer often was notified Niantic can't support vf rss,
> But with lots of experiments and find that it still has limited vf rss functionality.
> Even on that, linux ixgbe driver has at most 2 queues per vf,
> But the dpdk could enable 4 queues per vf.
> In summary, dpdk could support vf rss on Niantic with at most 4 queues per vf,
> but the querying of reta is very limited due to the HW limitation.

Limited? I meant missing, right?

> Hope you are on the same page now.
>
> Thanks
> Changchun
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-25  2:14         ` Ouyang, Changchun
@ 2014-12-25 13:13           ` Vlad Zolotarov
  2014-12-26  2:07             ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2014-12-25 13:13 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 12/25/14 04:14, Ouyang, Changchun wrote:
> Hi,
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Wednesday, December 24, 2014 6:40 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>>
>>
>> On 12/24/14 07:23, Ouyang Changchun wrote:
>>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
>> RSS.
>>> The psrtype will determine how many queues the received packets will
>>> distribute to, and the value of psrtype should depends on both facet:
>>> max VF rxq number which has been negotiated with PF, and the number of
>> rxq specified in config on guest.
>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>> ---
>>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
>>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
>> ++++++++++++++++++++++++++++++++++-----
>>>    2 files changed, 97 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
>> *eth_dev)
>>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
>>> mac.num_rar_entries), 0);
>>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
>>> mac.num_rar_entries), 0);
>>>
>>> +	/*
>>> +	 * VF RSS can support at most 4 queues for each VF, even if
>>> +	 * 8 queues are available for each VF, it need refine to 4
>>> +	 * queues here due to this limitation, otherwise no queue
>>> +	 * will receive any packet even RSS is enabled.
>> According to Table 7-3 in the 82599 spec RSS is not available when port is
>> configured to have 8 queues per pool. This means that if u see this
>> configuration u may immediately disable RSS flow in your code.
>>
> 8 queues here means the available number queue per vf, it is calculated according to max vfs,
> e.g. if max vfs is 16(or less than), then each vf 'COULD' have 8 queues evenly, pf early init stage estimate this value,
> but that is not precise, so need refine this.
> User don't know this estimated value, it is internal value, not come from user's input/configure.
> Hope it is clear to you.
>>> +	 */
>>> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
>> ETH_MQ_RX_VMDQ_RSS) {
>>> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
>> ETH_32_POOLS;
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
>>> +				dev_num_vf(eth_dev) * 4;
>> According to 82599 spec u can't do that since RSS is not allowed when port is
>> configured to have 8 function per-VF. Have u verified that this works? If yes,
>> then spec should be updated.
>>
> Response as above,
> Of course I have validated this. It works well.
>
>>> +		}
>>> +	}
>>> +
>>>    	/* set VMDq map to default PF pool */
>>>    	hw->mac.ops.set_vmdq(hw, 0,
>>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> index f69abda..a7c17a4 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
>> igb_rx_queue *rxq)
>>>    }
>>>
>>>    static int
>>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
>>> +	struct ixgbe_hw *hw;
>>> +	uint32_t mrqc;
>>> +
>>> +	ixgbe_rss_configure(dev);
>>> +
>>> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>> +
>>> +	/* MRQC: enable VF RSS */
>>> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
>>> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
>>> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
>>> +	case ETH_64_POOLS:
>>> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
>>> +		break;
>>> +
>>> +	case ETH_32_POOLS:
>>> +	case ETH_16_POOLS:
>>> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
>> Again, this contradicts with the spec.
>>
>>> +		break;
>>> +
>>> +	default:
>>> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int
>>>    ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>>>    {
>>>    	struct ixgbe_hw *hw =
>>> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
>> rte_eth_dev *dev)
>>>    			default: ixgbe_rss_disable(dev);
>>>    		}
>>>    	} else {
>>> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
>>>    		/*
>>>    		 * SRIOV active scheme
>>>    		 * FIXME if support DCB/RSS together with VMDq & SRIOV
>>>    		 */
>>> -		case ETH_64_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQEN);
>>> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
>>> +		case ETH_MQ_RX_RSS:
>>> +		case ETH_MQ_RX_VMDQ_RSS:
>>> +			ixgbe_config_vf_rss(dev);
>>>    			break;
>>>
>>> -		case ETH_32_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQRT4TCEN);
>>> -			break;
>>> +		default:
>>> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
>> Sorry for nitpicking but have u considered taking this encapsulated "switch-
>> case" block into a separate function? This could make the code look a lot
>> nicer. ;)
> Only one place use it, so don't need make it a function,
> And I prefer to the current code.

Functions may be used not only to have a repeatedly called code but also 
to make a caller code more readable. Encapsulated switch-case is one of 
the examples of a *not* readable code constructs which should be avoided.

>
>>> +			case ETH_64_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQEN);
>>> +				break;
>>>
>>> -		case ETH_16_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQRT8TCEN);
>>> +			case ETH_32_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQRT4TCEN);
>>> +				break;
>>> +
>>> +			case ETH_16_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQRT8TCEN);
>>> +				break;
>>> +			default:
>>> +				PMD_INIT_LOG(ERR,
>>> +					"invalid pool number in IOV mode");
>>> +				break;
>>> +			}
>>>    			break;
>>> -		default:
>>> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
>> mode");
>>>    		}
>>>    	}
>>>
>>> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>>>    	uint16_t buf_size;
>>>    	uint16_t i;
>>>    	int ret;
>>> +	uint16_t valid_rxq_num;
>>>
>>>    	PMD_INIT_FUNC_TRACE();
>>>    	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>>
>>> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
>>> +hw->mac.max_rx_queues);
>>> +
>>> +	/*
>>> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
>>> +	 * and give user a hint that some packets may loss if it doesn't
>>> +	 * poll the queue where those packets are distributed to.
>>> +	 */
>>> +	if (valid_rxq_num == 3)
>>> +		valid_rxq_num = 4;
>> Why to configure more queues that requested and not less (2)? Why to
>> configure anything at all and not return an error?
>>
>>> +
>>> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
>>> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
>>> +			"it should be equal to or less than %d",
>>> +			valid_rxq_num);
>>> +		return -1;
>>> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
>>> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
>>> +			"than the number of available Rx queues:%d, "
>>> +			"packets in Rx queues(q_id >= %d) may loss.",
>>> +			valid_rxq_num, dev->data->nb_rx_queues);
>> Who ever looks in the "INIT_LOG" if everything "work well" and u make it
>> look so by allowing this call to succeed. And then some packets will just
>> silently not arrive?! And what the used should somehow guess to do?
>> - Look in the "INIT_LOG"?! This is a nightmare!
> Sorry, I don't think so again, if user find any packets loss, he will care for log,
> Then he can find that log there, then user can refine its rxq number due the wrong rxq number,
> Why is it a nightmare?

Because usually u expect that if the function call returns with a 
success it means a success. Why a user has to learn that a device 
configuration function was provided with wrong parameters from the 
packet loss? If parameters are not allowed u expect to get an error as a 
return value. Since when errors are returned in a form of a log message? 
Why do u think there is a living person running a DPDK based 
application? How do u expect somebody build an automated environment 
when part of errors are returned in some log? Should he/she add a log 
parser?
On the other hand, why do u think 4 queues is a better option for a user 
than 2 queue when he asked for 3 queues? What kind of heuristics is that?

To summarize - it would be much better if u just returned an EINVAL 
error in that case.

>
> I don't agree with you about "silently not arrive", because we have hint/log there.
>
> Return error here is also possible way,

It's the only possible way! ;)

> Again need other guys' insight here.
>
>>> +
>>>    	/*
>>>    	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
>> driver
>>>    	 * disables the VF receipt of packets if the PF MTU is > 1500.
>>> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>>>    			IXGBE_PSRTYPE_IPV6HDR;
>>>    #endif
>>>
>>> +	/* Set RQPL for VF RSS according to max Rx queue */
>>> +	psrtype |= (valid_rxq_num >> 1) <<
>>> +		IXGBE_PSRTYPE_RQPL_SHIFT;
>>>    	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
>>>
>>>    	if (dev->data->dev_conf.rxmode.enable_scatter) {

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-25  2:43         ` Ouyang, Changchun
@ 2014-12-25 13:20           ` Vlad Zolotarov
  2014-12-26  1:52             ` Ouyang, Changchun
  2014-12-25 13:38           ` Vlad Zolotarov
  1 sibling, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2014-12-25 13:20 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 12/25/14 04:43, Ouyang, Changchun wrote:
> Hi,
> Sorry miss some comments, so continue my response below,
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Wednesday, December 24, 2014 6:40 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>>
>>
>> On 12/24/14 07:23, Ouyang Changchun wrote:
>>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
>> RSS.
>>> The psrtype will determine how many queues the received packets will
>>> distribute to, and the value of psrtype should depends on both facet:
>>> max VF rxq number which has been negotiated with PF, and the number of
>> rxq specified in config on guest.
>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>> ---
>>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
>>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
>> ++++++++++++++++++++++++++++++++++-----
>>>    2 files changed, 97 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
>> *eth_dev)
>>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
>>> mac.num_rar_entries), 0);
>>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
>>> mac.num_rar_entries), 0);
>>>
>>> +	/*
>>> +	 * VF RSS can support at most 4 queues for each VF, even if
>>> +	 * 8 queues are available for each VF, it need refine to 4
>>> +	 * queues here due to this limitation, otherwise no queue
>>> +	 * will receive any packet even RSS is enabled.
>> According to Table 7-3 in the 82599 spec RSS is not available when port is
>> configured to have 8 queues per pool. This means that if u see this
>> configuration u may immediately disable RSS flow in your code.
>>
>>> +	 */
>>> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
>> ETH_MQ_RX_VMDQ_RSS) {
>>> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
>> ETH_32_POOLS;
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
>>> +				dev_num_vf(eth_dev) * 4;
>> According to 82599 spec u can't do that since RSS is not allowed when port is
>> configured to have 8 function per-VF. Have u verified that this works? If yes,
>> then spec should be updated.
>>
>>> +		}
>>> +	}
>>> +
>>>    	/* set VMDq map to default PF pool */
>>>    	hw->mac.ops.set_vmdq(hw, 0,
>>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> index f69abda..a7c17a4 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
>> igb_rx_queue *rxq)
>>>    }
>>>
>>>    static int
>>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
>>> +	struct ixgbe_hw *hw;
>>> +	uint32_t mrqc;
>>> +
>>> +	ixgbe_rss_configure(dev);
>>> +
>>> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>> +
>>> +	/* MRQC: enable VF RSS */
>>> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
>>> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
>>> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
>>> +	case ETH_64_POOLS:
>>> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
>>> +		break;
>>> +
>>> +	case ETH_32_POOLS:
>>> +	case ETH_16_POOLS:
>>> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
>> Again, this contradicts with the spec.
> Yes, the spec say the hw can't support vf rss at all, but experiment find that could be done.

The spec explicitly say that VF RSS *is* supported in particular in the 
table mentioned above.
What your code is doing is that in case of 16 VFs u setup a 32 pools 
configuration and use only 16 out of them.

> We can focus on discussing the implementation firstly.

>   
>>> +		break;
>>> +
>>> +	default:
>>> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int
>>>    ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>>>    {
>>>    	struct ixgbe_hw *hw =
>>> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
>> rte_eth_dev *dev)
>>>    			default: ixgbe_rss_disable(dev);
>>>    		}
>>>    	} else {
>>> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
>>>    		/*
>>>    		 * SRIOV active scheme
>>>    		 * FIXME if support DCB/RSS together with VMDq & SRIOV
>>>    		 */
>>> -		case ETH_64_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQEN);
>>> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
>>> +		case ETH_MQ_RX_RSS:
>>> +		case ETH_MQ_RX_VMDQ_RSS:
>>> +			ixgbe_config_vf_rss(dev);
>>>    			break;
>>>
>>> -		case ETH_32_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQRT4TCEN);
>>> -			break;
>>> +		default:
>>> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
>> Sorry for nitpicking but have u considered taking this encapsulated "switch-
>> case" block into a separate function? This could make the code look a lot
>> nicer. ;)
>>
>>> +			case ETH_64_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQEN);
>>> +				break;
>>>
>>> -		case ETH_16_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQRT8TCEN);
>>> +			case ETH_32_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQRT4TCEN);
>>> +				break;
>>> +
>>> +			case ETH_16_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQRT8TCEN);
>>> +				break;
>>> +			default:
>>> +				PMD_INIT_LOG(ERR,
>>> +					"invalid pool number in IOV mode");
>>> +				break;
>>> +			}
>>>    			break;
>>> -		default:
>>> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
>> mode");
>>>    		}
>>>    	}
>>>
>>> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>>>    	uint16_t buf_size;
>>>    	uint16_t i;
>>>    	int ret;
>>> +	uint16_t valid_rxq_num;
>>>
>>>    	PMD_INIT_FUNC_TRACE();
>>>    	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>>
>>> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
>>> +hw->mac.max_rx_queues);
>>> +
>>> +	/*
>>> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
>>> +	 * and give user a hint that some packets may loss if it doesn't
>>> +	 * poll the queue where those packets are distributed to.
>>> +	 */
>>> +	if (valid_rxq_num == 3)
>>> +		valid_rxq_num = 4;
>> Why to configure more queues that requested and not less (2)? Why to
>> configure anything at all and not return an error?
> Sorry, I don't agree this is "anything" you say, because I don't use 5,6,7, 8, ..., 16, 2014, 2015,... etc.
> By considering 2 or 4,
> I prefer 4, the reason is if user need more than 3 queues per vf to do something,
> And pf has also the capability to setup 4 queues per vf, confining to 2 queues is also not good thing,
> So here try to enable 4 queues, and give user hints here.
> Btw, change it into 2 is another way, depends on other guys' more insight here.

Like I said before, trying to guess what user wants is a way to making a 
code that is very hard to use and to maintain. Pls., just return an 
error and let the user code deal with it the way he/she really wants and 
not the way u *think* he/she wants.

>
>>> +
>>> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
>>> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
>>> +			"it should be equal to or less than %d",
>>> +			valid_rxq_num);
>>> +		return -1;
>>> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
>>> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
>>> +			"than the number of available Rx queues:%d, "
>>> +			"packets in Rx queues(q_id >= %d) may loss.",
>>> +			valid_rxq_num, dev->data->nb_rx_queues);
>> Who ever looks in the "INIT_LOG" if everything "work well" and u make it
>> look so by allowing this call to succeed. And then some packets will just
>> silently not arrive?! And what the used should somehow guess to do?
>> - Look in the "INIT_LOG"?! This is a nightmare!
>>
>>> +
>>>    	/*
>>>    	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
>> driver
>>>    	 * disables the VF receipt of packets if the PF MTU is > 1500.
>>> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>>>    			IXGBE_PSRTYPE_IPV6HDR;
>>>    #endif
>>>
>>> +	/* Set RQPL for VF RSS according to max Rx queue */
>>> +	psrtype |= (valid_rxq_num >> 1) <<
>>> +		IXGBE_PSRTYPE_RQPL_SHIFT;
>>>    	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
>>>
>>>    	if (dev->data->dev_conf.rxmode.enable_scatter) {

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-25  2:43         ` Ouyang, Changchun
  2014-12-25 13:20           ` Vlad Zolotarov
@ 2014-12-25 13:38           ` Vlad Zolotarov
  2014-12-26  1:26             ` Ouyang, Changchun
  1 sibling, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2014-12-25 13:38 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 12/25/14 04:43, Ouyang, Changchun wrote:
> Hi,
> Sorry miss some comments, so continue my response below,
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Wednesday, December 24, 2014 6:40 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>>
>>
>> On 12/24/14 07:23, Ouyang Changchun wrote:
>>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
>> RSS.
>>> The psrtype will determine how many queues the received packets will
>>> distribute to, and the value of psrtype should depends on both facet:
>>> max VF rxq number which has been negotiated with PF, and the number of
>> rxq specified in config on guest.
>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>> ---
>>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
>>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
>> ++++++++++++++++++++++++++++++++++-----
>>>    2 files changed, 97 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
>> *eth_dev)
>>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
>>> mac.num_rar_entries), 0);
>>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
>>> mac.num_rar_entries), 0);
>>>
>>> +	/*
>>> +	 * VF RSS can support at most 4 queues for each VF, even if
>>> +	 * 8 queues are available for each VF, it need refine to 4
>>> +	 * queues here due to this limitation, otherwise no queue
>>> +	 * will receive any packet even RSS is enabled.
>> According to Table 7-3 in the 82599 spec RSS is not available when port is
>> configured to have 8 queues per pool. This means that if u see this
>> configuration u may immediately disable RSS flow in your code.
>>
>>> +	 */
>>> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
>> ETH_MQ_RX_VMDQ_RSS) {
>>> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
>> ETH_32_POOLS;
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
>>> +				dev_num_vf(eth_dev) * 4;
>> According to 82599 spec u can't do that since RSS is not allowed when port is
>> configured to have 8 function per-VF. Have u verified that this works? If yes,
>> then spec should be updated.
>>
>>> +		}
>>> +	}
>>> +
>>>    	/* set VMDq map to default PF pool */
>>>    	hw->mac.ops.set_vmdq(hw, 0,
>>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> index f69abda..a7c17a4 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
>> igb_rx_queue *rxq)
>>>    }
>>>
>>>    static int
>>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
>>> +	struct ixgbe_hw *hw;
>>> +	uint32_t mrqc;
>>> +
>>> +	ixgbe_rss_configure(dev);
>>> +
>>> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>> +
>>> +	/* MRQC: enable VF RSS */
>>> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
>>> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
>>> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
>>> +	case ETH_64_POOLS:
>>> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
>>> +		break;
>>> +
>>> +	case ETH_32_POOLS:
>>> +	case ETH_16_POOLS:
>>> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
>> Again, this contradicts with the spec.
> Yes, the spec say the hw can't support vf rss at all, but experiment find that could be done.

I have just realized something - why did u have to experiment at all? U 
work at Intel, don't u? Can't u just ask a HW engineer that have 
designed this NIC? What do u mean by an "experiment" here?
 From my experience u can't just write some random values in the 
registers and conclude that if it worked for like 5 minutes it will 
continue to work for the next minute... There is always a clear 
procedure of how HW should be initialized and used and that's the only 
way it may be used since this was the way the HW has been tested. U 
can't assume anything in regards to reliability if u don't follow specs 
and programmer manuals of HW provider.

Could u clarify, please?

> We can focus on discussing the implementation firstly.
>   
>>> +		break;
>>> +
>>> +	default:
>>> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int
>>>    ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>>>    {
>>>    	struct ixgbe_hw *hw =
>>> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
>> rte_eth_dev *dev)
>>>    			default: ixgbe_rss_disable(dev);
>>>    		}
>>>    	} else {
>>> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
>>>    		/*
>>>    		 * SRIOV active scheme
>>>    		 * FIXME if support DCB/RSS together with VMDq & SRIOV
>>>    		 */
>>> -		case ETH_64_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQEN);
>>> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
>>> +		case ETH_MQ_RX_RSS:
>>> +		case ETH_MQ_RX_VMDQ_RSS:
>>> +			ixgbe_config_vf_rss(dev);
>>>    			break;
>>>
>>> -		case ETH_32_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQRT4TCEN);
>>> -			break;
>>> +		default:
>>> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
>> Sorry for nitpicking but have u considered taking this encapsulated "switch-
>> case" block into a separate function? This could make the code look a lot
>> nicer. ;)
>>
>>> +			case ETH_64_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQEN);
>>> +				break;
>>>
>>> -		case ETH_16_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQRT8TCEN);
>>> +			case ETH_32_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQRT4TCEN);
>>> +				break;
>>> +
>>> +			case ETH_16_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQRT8TCEN);
>>> +				break;
>>> +			default:
>>> +				PMD_INIT_LOG(ERR,
>>> +					"invalid pool number in IOV mode");
>>> +				break;
>>> +			}
>>>    			break;
>>> -		default:
>>> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
>> mode");
>>>    		}
>>>    	}
>>>
>>> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>>>    	uint16_t buf_size;
>>>    	uint16_t i;
>>>    	int ret;
>>> +	uint16_t valid_rxq_num;
>>>
>>>    	PMD_INIT_FUNC_TRACE();
>>>    	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>>
>>> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
>>> +hw->mac.max_rx_queues);
>>> +
>>> +	/*
>>> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
>>> +	 * and give user a hint that some packets may loss if it doesn't
>>> +	 * poll the queue where those packets are distributed to.
>>> +	 */
>>> +	if (valid_rxq_num == 3)
>>> +		valid_rxq_num = 4;
>> Why to configure more queues that requested and not less (2)? Why to
>> configure anything at all and not return an error?
> Sorry, I don't agree this is "anything" you say, because I don't use 5,6,7, 8, ..., 16, 2014, 2015,... etc.
> By considering 2 or 4,
> I prefer 4, the reason is if user need more than 3 queues per vf to do something,
> And pf has also the capability to setup 4 queues per vf, confining to 2 queues is also not good thing,
> So here try to enable 4 queues, and give user hints here.
> Btw, change it into 2 is another way, depends on other guys' more insight here.
>
>>> +
>>> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
>>> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
>>> +			"it should be equal to or less than %d",
>>> +			valid_rxq_num);
>>> +		return -1;
>>> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
>>> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
>>> +			"than the number of available Rx queues:%d, "
>>> +			"packets in Rx queues(q_id >= %d) may loss.",
>>> +			valid_rxq_num, dev->data->nb_rx_queues);
>> Who ever looks in the "INIT_LOG" if everything "work well" and u make it
>> look so by allowing this call to succeed. And then some packets will just
>> silently not arrive?! And what the used should somehow guess to do?
>> - Look in the "INIT_LOG"?! This is a nightmare!
>>
>>> +
>>>    	/*
>>>    	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
>> driver
>>>    	 * disables the VF receipt of packets if the PF MTU is > 1500.
>>> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>>>    			IXGBE_PSRTYPE_IPV6HDR;
>>>    #endif
>>>
>>> +	/* Set RQPL for VF RSS according to max Rx queue */
>>> +	psrtype |= (valid_rxq_num >> 1) <<
>>> +		IXGBE_PSRTYPE_RQPL_SHIFT;
>>>    	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
>>>
>>>    	if (dev->data->dev_conf.rxmode.enable_scatter) {

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-25 13:38           ` Vlad Zolotarov
@ 2014-12-26  1:26             ` Ouyang, Changchun
  0 siblings, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2014-12-26  1:26 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Thursday, December 25, 2014 9:39 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> 
> On 12/25/14 04:43, Ouyang, Changchun wrote:
> > Hi,
> > Sorry miss some comments, so continue my response below,
> >
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Wednesday, December 24, 2014 6:40 PM
> >> To: Ouyang, Changchun; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> >>
> >>
> >> On 12/24/14 07:23, Ouyang Changchun wrote:
> >>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable
> VF
> >> RSS.
> >>> The psrtype will determine how many queues the received packets will
> >>> distribute to, and the value of psrtype should depends on both facet:
> >>> max VF rxq number which has been negotiated with PF, and the number
> >>> of
> >> rxq specified in config on guest.
> >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>> ---
> >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> >>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> >> ++++++++++++++++++++++++++++++++++-----
> >>>    2 files changed, 97 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> >> *eth_dev)
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> mac.num_rar_entries), 0);
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> mac.num_rar_entries), 0);
> >>>
> >>> +	/*
> >>> +	 * VF RSS can support at most 4 queues for each VF, even if
> >>> +	 * 8 queues are available for each VF, it need refine to 4
> >>> +	 * queues here due to this limitation, otherwise no queue
> >>> +	 * will receive any packet even RSS is enabled.
> >> According to Table 7-3 in the 82599 spec RSS is not available when
> >> port is configured to have 8 queues per pool. This means that if u
> >> see this configuration u may immediately disable RSS flow in your code.
> >>
> >>> +	 */
> >>> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> >> ETH_MQ_RX_VMDQ_RSS) {
> >>> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> >> ETH_32_POOLS;
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> >>> +				dev_num_vf(eth_dev) * 4;
> >> According to 82599 spec u can't do that since RSS is not allowed when
> >> port is configured to have 8 function per-VF. Have u verified that
> >> this works? If yes, then spec should be updated.
> >>
> >>> +		}
> >>> +	}
> >>> +
> >>>    	/* set VMDq map to default PF pool */
> >>>    	hw->mac.ops.set_vmdq(hw, 0,
> >>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> >>>
> >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> index f69abda..a7c17a4 100644
> >>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> >> igb_rx_queue *rxq)
> >>>    }
> >>>
> >>>    static int
> >>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> >>> +	struct ixgbe_hw *hw;
> >>> +	uint32_t mrqc;
> >>> +
> >>> +	ixgbe_rss_configure(dev);
> >>> +
> >>> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>> +
> >>> +	/* MRQC: enable VF RSS */
> >>> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> >>> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> >>> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >>> +	case ETH_64_POOLS:
> >>> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> >>> +		break;
> >>> +
> >>> +	case ETH_32_POOLS:
> >>> +	case ETH_16_POOLS:
> >>> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> >> Again, this contradicts with the spec.
> > Yes, the spec say the hw can't support vf rss at all, but experiment find that
> could be done.
> 
> I have just realized something - why did u have to experiment at all? U work
> at Intel, don't u? Can't u just ask a HW engineer that have designed this NIC?
> What do u mean by an "experiment" here?

HW and dpdk team are cross-geo and cross division, experiment is often needed
Because no spec, doc, discussion, meeting minutes is perfect and can include everything absolutely correct,
And don't miss one thing, are you agree?
  
Anyway Let's focus on technical question, please. 
 
>  From my experience u can't just write some random values in the registers

I don't think it is random data at all, do you see I use a random seed to generate a random data here?

> and conclude that if it worked for like 5 minutes it will continue to work for
> the next minute... There is always a clear procedure of how HW should be
> initialized and used and that's the only way it may be used since this was the
> way the HW has been tested. U can't assume anything in regards to reliability
> if u don't follow specs and programmer manuals of HW provider.
>
Sorry I don't think it doesn't follow spec and programmer manuals.
and if you have a bit bigger picture here(just mean the overall process for the vf rss enabling, and nothing else,
Don't think too much, hope it is clear), I am sure you won't get confused here.
Or my observation is you probably have another perfect solution to support vf rss in dpdk,
Right? If you have it, you could also submit your patch here, this is open source, I think you are free to do it.

> Could u clarify, please?
> 
> > We can focus on discussing the implementation firstly.
> >
> >>> +		break;
> >>> +
> >>> +	default:
> >>> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> >>> +		return -EINVAL;
> >>> +	}
> >>> +
> >>> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int
> >>>    ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
> >>>    {
> >>>    	struct ixgbe_hw *hw =
> >>> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
> >> rte_eth_dev *dev)
> >>>    			default: ixgbe_rss_disable(dev);
> >>>    		}
> >>>    	} else {
> >>> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >>>    		/*
> >>>    		 * SRIOV active scheme
> >>>    		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> >>>    		 */
> >>> -		case ETH_64_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQEN);
> >>> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> >>> +		case ETH_MQ_RX_RSS:
> >>> +		case ETH_MQ_RX_VMDQ_RSS:
> >>> +			ixgbe_config_vf_rss(dev);
> >>>    			break;
> >>>
> >>> -		case ETH_32_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQRT4TCEN);
> >>> -			break;
> >>> +		default:
> >>> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >> Sorry for nitpicking but have u considered taking this encapsulated
> >> "switch- case" block into a separate function? This could make the
> >> code look a lot nicer. ;)
> >>
> >>> +			case ETH_64_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQEN);
> >>> +				break;
> >>>
> >>> -		case ETH_16_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQRT8TCEN);
> >>> +			case ETH_32_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQRT4TCEN);
> >>> +				break;
> >>> +
> >>> +			case ETH_16_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQRT8TCEN);
> >>> +				break;
> >>> +			default:
> >>> +				PMD_INIT_LOG(ERR,
> >>> +					"invalid pool number in IOV mode");
> >>> +				break;
> >>> +			}
> >>>    			break;
> >>> -		default:
> >>> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
> >> mode");
> >>>    		}
> >>>    	}
> >>>
> >>> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev
> *dev)
> >>>    	uint16_t buf_size;
> >>>    	uint16_t i;
> >>>    	int ret;
> >>> +	uint16_t valid_rxq_num;
> >>>
> >>>    	PMD_INIT_FUNC_TRACE();
> >>>    	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>>
> >>> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
> >>> +hw->mac.max_rx_queues);
> >>> +
> >>> +	/*
> >>> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> >>> +	 * and give user a hint that some packets may loss if it doesn't
> >>> +	 * poll the queue where those packets are distributed to.
> >>> +	 */
> >>> +	if (valid_rxq_num == 3)
> >>> +		valid_rxq_num = 4;
> >> Why to configure more queues that requested and not less (2)? Why to
> >> configure anything at all and not return an error?
> > Sorry, I don't agree this is "anything" you say, because I don't use 5,6,7,
> 8, ..., 16, 2014, 2015,... etc.
> > By considering 2 or 4,
> > I prefer 4, the reason is if user need more than 3 queues per vf to do
> > something, And pf has also the capability to setup 4 queues per vf,
> > confining to 2 queues is also not good thing, So here try to enable 4 queues,
> and give user hints here.
> > Btw, change it into 2 is another way, depends on other guys' more insight
> here.
> >
> >>> +
> >>> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
> >>> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> >>> +			"it should be equal to or less than %d",
> >>> +			valid_rxq_num);
> >>> +		return -1;
> >>> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
> >>> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> >>> +			"than the number of available Rx queues:%d, "
> >>> +			"packets in Rx queues(q_id >= %d) may loss.",
> >>> +			valid_rxq_num, dev->data->nb_rx_queues);
> >> Who ever looks in the "INIT_LOG" if everything "work well" and u make
> >> it look so by allowing this call to succeed. And then some packets
> >> will just silently not arrive?! And what the used should somehow guess to
> do?
> >> - Look in the "INIT_LOG"?! This is a nightmare!
> >>
> >>> +
> >>>    	/*
> >>>    	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
> >> driver
> >>>    	 * disables the VF receipt of packets if the PF MTU is > 1500.
> >>> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >>>    			IXGBE_PSRTYPE_IPV6HDR;
> >>>    #endif
> >>>
> >>> +	/* Set RQPL for VF RSS according to max Rx queue */
> >>> +	psrtype |= (valid_rxq_num >> 1) <<
> >>> +		IXGBE_PSRTYPE_RQPL_SHIFT;
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> >>>
> >>>    	if (dev->data->dev_conf.rxmode.enable_scatter) {

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-25 13:20           ` Vlad Zolotarov
@ 2014-12-26  1:52             ` Ouyang, Changchun
  2014-12-26  6:49               ` Vladislav Zolotarov
  2015-01-05 10:29               ` Bruce Richardson
  0 siblings, 2 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2014-12-26  1:52 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Thursday, December 25, 2014 9:20 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> 
> On 12/25/14 04:43, Ouyang, Changchun wrote:
> > Hi,
> > Sorry miss some comments, so continue my response below,
> >
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Wednesday, December 24, 2014 6:40 PM
> >> To: Ouyang, Changchun; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> >>
> >>
> >> On 12/24/14 07:23, Ouyang Changchun wrote:
> >>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable
> VF
> >> RSS.
> >>> The psrtype will determine how many queues the received packets will
> >>> distribute to, and the value of psrtype should depends on both facet:
> >>> max VF rxq number which has been negotiated with PF, and the number
> >>> of
> >> rxq specified in config on guest.
> >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>> ---
> >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> >>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> >> ++++++++++++++++++++++++++++++++++-----
> >>>    2 files changed, 97 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> >> *eth_dev)
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> mac.num_rar_entries), 0);
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> mac.num_rar_entries), 0);
> >>>
> >>> +	/*
> >>> +	 * VF RSS can support at most 4 queues for each VF, even if
> >>> +	 * 8 queues are available for each VF, it need refine to 4
> >>> +	 * queues here due to this limitation, otherwise no queue
> >>> +	 * will receive any packet even RSS is enabled.
> >> According to Table 7-3 in the 82599 spec RSS is not available when
> >> port is configured to have 8 queues per pool. This means that if u
> >> see this configuration u may immediately disable RSS flow in your code.
> >>
> >>> +	 */
> >>> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> >> ETH_MQ_RX_VMDQ_RSS) {
> >>> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> >> ETH_32_POOLS;
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> >>> +				dev_num_vf(eth_dev) * 4;
> >> According to 82599 spec u can't do that since RSS is not allowed when
> >> port is configured to have 8 function per-VF. Have u verified that
> >> this works? If yes, then spec should be updated.
> >>
> >>> +		}
> >>> +	}
> >>> +
> >>>    	/* set VMDq map to default PF pool */
> >>>    	hw->mac.ops.set_vmdq(hw, 0,
> >>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> >>>
> >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> index f69abda..a7c17a4 100644
> >>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> >> igb_rx_queue *rxq)
> >>>    }
> >>>
> >>>    static int
> >>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> >>> +	struct ixgbe_hw *hw;
> >>> +	uint32_t mrqc;
> >>> +
> >>> +	ixgbe_rss_configure(dev);
> >>> +
> >>> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>> +
> >>> +	/* MRQC: enable VF RSS */
> >>> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> >>> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> >>> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >>> +	case ETH_64_POOLS:
> >>> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> >>> +		break;
> >>> +
> >>> +	case ETH_32_POOLS:
> >>> +	case ETH_16_POOLS:
> >>> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> >> Again, this contradicts with the spec.
> > Yes, the spec say the hw can't support vf rss at all, but experiment find that
> could be done.
> 
> The spec explicitly say that VF RSS *is* supported in particular in the table
> mentioned above.
But the spec(January 2014 revision 2.9) on my hand says: "in IOV mode, VMDq+RSS mode is not available"  in note of section 4.6.10.2.1
> What your code is doing is that in case of 16 VFs u setup a 32 pools
> configuration and use only 16 out of them.
But I don't see any big issue here, in this case, each vf COULD have 8 queues, like I said before, but this is estimation value, actually only 4 queues
Are really available for one vf, you can refer to spec for the correctness here.
> 
> > We can focus on discussing the implementation firstly.
> 
> >
> >>> +		break;
> >>> +
> >>> +	default:
> >>> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> >>> +		return -EINVAL;
> >>> +	}
> >>> +
> >>> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int
> >>>    ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
> >>>    {
> >>>    	struct ixgbe_hw *hw =
> >>> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
> >> rte_eth_dev *dev)
> >>>    			default: ixgbe_rss_disable(dev);
> >>>    		}
> >>>    	} else {
> >>> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >>>    		/*
> >>>    		 * SRIOV active scheme
> >>>    		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> >>>    		 */
> >>> -		case ETH_64_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQEN);
> >>> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> >>> +		case ETH_MQ_RX_RSS:
> >>> +		case ETH_MQ_RX_VMDQ_RSS:
> >>> +			ixgbe_config_vf_rss(dev);
> >>>    			break;
> >>>
> >>> -		case ETH_32_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQRT4TCEN);
> >>> -			break;
> >>> +		default:
> >>> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >> Sorry for nitpicking but have u considered taking this encapsulated
> >> "switch- case" block into a separate function? This could make the
> >> code look a lot nicer. ;)
> >>
> >>> +			case ETH_64_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQEN);
> >>> +				break;
> >>>
> >>> -		case ETH_16_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQRT8TCEN);
> >>> +			case ETH_32_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQRT4TCEN);
> >>> +				break;
> >>> +
> >>> +			case ETH_16_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQRT8TCEN);
> >>> +				break;
> >>> +			default:
> >>> +				PMD_INIT_LOG(ERR,
> >>> +					"invalid pool number in IOV mode");
> >>> +				break;
> >>> +			}
> >>>    			break;
> >>> -		default:
> >>> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
> >> mode");
> >>>    		}
> >>>    	}
> >>>
> >>> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev
> *dev)
> >>>    	uint16_t buf_size;
> >>>    	uint16_t i;
> >>>    	int ret;
> >>> +	uint16_t valid_rxq_num;
> >>>
> >>>    	PMD_INIT_FUNC_TRACE();
> >>>    	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>>
> >>> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
> >>> +hw->mac.max_rx_queues);
> >>> +
> >>> +	/*
> >>> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> >>> +	 * and give user a hint that some packets may loss if it doesn't
> >>> +	 * poll the queue where those packets are distributed to.
> >>> +	 */
> >>> +	if (valid_rxq_num == 3)
> >>> +		valid_rxq_num = 4;
> >> Why to configure more queues that requested and not less (2)? Why to
> >> configure anything at all and not return an error?
> > Sorry, I don't agree this is "anything" you say, because I don't use 5,6,7,
> 8, ..., 16, 2014, 2015,... etc.
> > By considering 2 or 4,
> > I prefer 4, the reason is if user need more than 3 queues per vf to do
> > something, And pf has also the capability to setup 4 queues per vf,
> > confining to 2 queues is also not good thing, So here try to enable 4 queues,
> and give user hints here.
> > Btw, change it into 2 is another way, depends on other guys' more insight
> here.
> 
> Like I said before, trying to guess what user wants is a way to making a code
> that is very hard to use and to maintain. Pls., just return an error and let the
> user code deal with it the way he/she really wants and not the way u *think*
> he/she wants.
> 
I didn't disagree on this, either :-)
If you have strong reason for this way and more guys agree with it,
I will modify it probably in v4. 
> >
> >>> +
> >>> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
> >>> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> >>> +			"it should be equal to or less than %d",
> >>> +			valid_rxq_num);
> >>> +		return -1;
> >>> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
> >>> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> >>> +			"than the number of available Rx queues:%d, "
> >>> +			"packets in Rx queues(q_id >= %d) may loss.",
> >>> +			valid_rxq_num, dev->data->nb_rx_queues);
> >> Who ever looks in the "INIT_LOG" if everything "work well" and u make
> >> it look so by allowing this call to succeed. And then some packets
> >> will just silently not arrive?! And what the used should somehow guess to
> do?
> >> - Look in the "INIT_LOG"?! This is a nightmare!
> >>
> >>> +
> >>>    	/*
> >>>    	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
> >> driver
> >>>    	 * disables the VF receipt of packets if the PF MTU is > 1500.
> >>> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >>>    			IXGBE_PSRTYPE_IPV6HDR;
> >>>    #endif
> >>>
> >>> +	/* Set RQPL for VF RSS according to max Rx queue */
> >>> +	psrtype |= (valid_rxq_num >> 1) <<
> >>> +		IXGBE_PSRTYPE_RQPL_SHIFT;
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> >>>
> >>>    	if (dev->data->dev_conf.rxmode.enable_scatter) {

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-25 13:13           ` Vlad Zolotarov
@ 2014-12-26  2:07             ` Ouyang, Changchun
  0 siblings, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2014-12-26  2:07 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Thursday, December 25, 2014 9:14 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> 
> On 12/25/14 04:14, Ouyang, Changchun wrote:
> > Hi,
> >
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Wednesday, December 24, 2014 6:40 PM
> >> To: Ouyang, Changchun; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> >>
> >>
> >> On 12/24/14 07:23, Ouyang Changchun wrote:
> >>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable
> VF
> >> RSS.
> >>> The psrtype will determine how many queues the received packets will
> >>> distribute to, and the value of psrtype should depends on both facet:
> >>> max VF rxq number which has been negotiated with PF, and the number
> >>> of
> >> rxq specified in config on guest.
> >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>> ---
> >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> >>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> >> ++++++++++++++++++++++++++++++++++-----
> >>>    2 files changed, 97 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> >> *eth_dev)
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> mac.num_rar_entries), 0);
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> mac.num_rar_entries), 0);
> >>>
> >>> +	/*
> >>> +	 * VF RSS can support at most 4 queues for each VF, even if
> >>> +	 * 8 queues are available for each VF, it need refine to 4
> >>> +	 * queues here due to this limitation, otherwise no queue
> >>> +	 * will receive any packet even RSS is enabled.
> >> According to Table 7-3 in the 82599 spec RSS is not available when
> >> port is configured to have 8 queues per pool. This means that if u
> >> see this configuration u may immediately disable RSS flow in your code.
> >>
> > 8 queues here means the available number queue per vf, it is
> > calculated according to max vfs, e.g. if max vfs is 16(or less than),
> > then each vf 'COULD' have 8 queues evenly, pf early init stage estimate this
> value, but that is not precise, so need refine this.
> > User don't know this estimated value, it is internal value, not come from
> user's input/configure.
> > Hope it is clear to you.
> >>> +	 */
> >>> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> >> ETH_MQ_RX_VMDQ_RSS) {
> >>> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> >> ETH_32_POOLS;
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> >>> +				dev_num_vf(eth_dev) * 4;
> >> According to 82599 spec u can't do that since RSS is not allowed when
> >> port is configured to have 8 function per-VF. Have u verified that
> >> this works? If yes, then spec should be updated.
> >>
> > Response as above,
> > Of course I have validated this. It works well.
> >
> >>> +		}
> >>> +	}
> >>> +
> >>>    	/* set VMDq map to default PF pool */
> >>>    	hw->mac.ops.set_vmdq(hw, 0,
> >>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> >>>
> >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> index f69abda..a7c17a4 100644
> >>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> >> igb_rx_queue *rxq)
> >>>    }
> >>>
> >>>    static int
> >>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> >>> +	struct ixgbe_hw *hw;
> >>> +	uint32_t mrqc;
> >>> +
> >>> +	ixgbe_rss_configure(dev);
> >>> +
> >>> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>> +
> >>> +	/* MRQC: enable VF RSS */
> >>> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> >>> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> >>> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >>> +	case ETH_64_POOLS:
> >>> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> >>> +		break;
> >>> +
> >>> +	case ETH_32_POOLS:
> >>> +	case ETH_16_POOLS:
> >>> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> >> Again, this contradicts with the spec.
> >>
> >>> +		break;
> >>> +
> >>> +	default:
> >>> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> >>> +		return -EINVAL;
> >>> +	}
> >>> +
> >>> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int
> >>>    ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
> >>>    {
> >>>    	struct ixgbe_hw *hw =
> >>> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
> >> rte_eth_dev *dev)
> >>>    			default: ixgbe_rss_disable(dev);
> >>>    		}
> >>>    	} else {
> >>> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >>>    		/*
> >>>    		 * SRIOV active scheme
> >>>    		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> >>>    		 */
> >>> -		case ETH_64_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQEN);
> >>> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> >>> +		case ETH_MQ_RX_RSS:
> >>> +		case ETH_MQ_RX_VMDQ_RSS:
> >>> +			ixgbe_config_vf_rss(dev);
> >>>    			break;
> >>>
> >>> -		case ETH_32_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQRT4TCEN);
> >>> -			break;
> >>> +		default:
> >>> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >> Sorry for nitpicking but have u considered taking this encapsulated
> >> "switch- case" block into a separate function? This could make the
> >> code look a lot nicer. ;)
> > Only one place use it, so don't need make it a function, And I prefer
> > to the current code.
> 
> Functions may be used not only to have a repeatedly called code but also to
> make a caller code more readable. Encapsulated switch-case is one of the
> examples of a *not* readable code constructs which should be avoided.
> 
> >
> >>> +			case ETH_64_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQEN);
> >>> +				break;
> >>>
> >>> -		case ETH_16_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQRT8TCEN);
> >>> +			case ETH_32_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQRT4TCEN);
> >>> +				break;
> >>> +
> >>> +			case ETH_16_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQRT8TCEN);
> >>> +				break;
> >>> +			default:
> >>> +				PMD_INIT_LOG(ERR,
> >>> +					"invalid pool number in IOV mode");
> >>> +				break;
> >>> +			}
> >>>    			break;
> >>> -		default:
> >>> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
> >> mode");
> >>>    		}
> >>>    	}
> >>>
> >>> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev
> *dev)
> >>>    	uint16_t buf_size;
> >>>    	uint16_t i;
> >>>    	int ret;
> >>> +	uint16_t valid_rxq_num;
> >>>
> >>>    	PMD_INIT_FUNC_TRACE();
> >>>    	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>>
> >>> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
> >>> +hw->mac.max_rx_queues);
> >>> +
> >>> +	/*
> >>> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> >>> +	 * and give user a hint that some packets may loss if it doesn't
> >>> +	 * poll the queue where those packets are distributed to.
> >>> +	 */
> >>> +	if (valid_rxq_num == 3)
> >>> +		valid_rxq_num = 4;
> >> Why to configure more queues that requested and not less (2)? Why to
> >> configure anything at all and not return an error?
> >>
> >>> +
> >>> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
> >>> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> >>> +			"it should be equal to or less than %d",
> >>> +			valid_rxq_num);
> >>> +		return -1;
> >>> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
> >>> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> >>> +			"than the number of available Rx queues:%d, "
> >>> +			"packets in Rx queues(q_id >= %d) may loss.",
> >>> +			valid_rxq_num, dev->data->nb_rx_queues);
> >> Who ever looks in the "INIT_LOG" if everything "work well" and u make it
> >> look so by allowing this call to succeed. And then some packets will just
> >> silently not arrive?! And what the used should somehow guess to do?
> >> - Look in the "INIT_LOG"?! This is a nightmare!
> > Sorry, I don't think so again, if user find any packets loss, he will care for log,
> > Then he can find that log there, then user can refine its rxq number due
> the wrong rxq number,
> > Why is it a nightmare?
> 
> Because usually u expect that if the function call returns with a
> success it means a success. Why a user has to learn that a device
> configuration function was provided with wrong parameters from the
> packet loss? If parameters are not allowed u expect to get an error as a
> return value. Since when errors are returned in a form of a log message?
> Why do u think there is a living person running a DPDK based
> application? How do u expect somebody build an automated environment
> when part of errors are returned in some log? Should he/she add a log
> parser?
If it is automated environment, the log parsing should also be automated,
At least it should report to living person.  But this is another topic we could stop here. Let's focus on vf rss

> On the other hand, why do u think 4 queues is a better option for a user
> than 2 queue when he asked for 3 queues? What kind of heuristics is that?
> 
> To summarize - it would be much better if u just returned an EINVAL
> error in that case.

As I said, it seems you have strong reason to use 2 queues here, and return error
If user try to use 3 queues, I will consider it in v4, but still need insights from more guys here.
> 
> >
> > I don't agree with you about "silently not arrive", because we have hint/log
> there.
> >
> > Return error here is also possible way,
> 
> It's the only possible way! ;)
> 
> > Again need other guys' insight here.
> >
> >>> +
> >>>    	/*
> >>>    	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
> >> driver
> >>>    	 * disables the VF receipt of packets if the PF MTU is > 1500.
> >>> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >>>    			IXGBE_PSRTYPE_IPV6HDR;
> >>>    #endif
> >>>
> >>> +	/* Set RQPL for VF RSS according to max Rx queue */
> >>> +	psrtype |= (valid_rxq_num >> 1) <<
> >>> +		IXGBE_PSRTYPE_RQPL_SHIFT;
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> >>>
> >>>    	if (dev->data->dev_conf.rxmode.enable_scatter) {

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
  2014-12-25 12:46         ` Vlad Zolotarov
@ 2014-12-26  2:37           ` Ouyang, Changchun
       [not found]             ` <CAOYyTHbrB-VinN5ZEd1tYTnS7_GhCT1jiHiZzNKkQUEJ1rG79w@mail.gmail.com>
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2014-12-26  2:37 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Thursday, December 25, 2014 8:46 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
> 
> 
> On 12/25/14 04:26, Ouyang, Changchun wrote:
> > Hi,
> >
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Wednesday, December 24, 2014 6:49 PM
> >> To: Ouyang, Changchun;dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
> >>
> >>
> >> On 12/24/14 07:22, Ouyang Changchun wrote:
> >>> This patch enables VF RSS for Niantic, which allow each VF having at
> >>> most 4
> >> queues.
> >>> The actual queue number per VF depends on the total number of pool,
> >>> which is determined by the total number of VF at PF initialization
> >>> stage and the number of queue specified in config:
> >>> 1) If the number of VF is in the range from 1 to 32 and the number
> >>> of rxq is 4('--rxq 4' in testpmd), then there is totally 32
> >>> pools(ETH_32_POOLS), and each VF have 4 queues;
> >>>
> >>> 2)If the number of VF is in the range from 33 to 64 and the number
> >>> of rxq is 2('--rxq 2' in testpmd), then there is totally 64
> >>> pools(ETH_64_POOLS), and each VF have 2 queues;
> >>>
> >>> On host, to enable VF RSS functionality, rx mq mode should be set as
> >>> ETH_MQ_RX_VMDQ_RSS or ETH_MQ_RX_RSS mode, and SRIOV mode
> >> should be activated(max_vfs >= 1).
> >>> It also needs config VF RSS information like hash function, RSS key,
> >>> RSS key
> >> length.
> >>> The limitation for Niantic VF RSS is:
> >>> the hash and key are shared among PF and all VF, the RETA table with
> >>> 128 entries are also shared among PF and all VF. So it is not good
> >>> idea to query the hash and reta content per VF on guest, instead, it
> >>> makes
> >> sense to query them on host(PF).
> >>> v3 change:
> >>>     - More cleanup;
> >> This series is still missing the appropriate patches in the
> >> rte_eth_dev_info_get() flow to return a reta_size for a VF device;
> >> and to
> >> rte_eth_dev_rss_reta_query() in the context of a VF device (I haven't
> >> noticed the initialization of a
> >> dev->dev_ops->reta_query for the VF device in this series).
> >>
> >> Without these code bits it's impossible to work with the VF devices
> >> in the RSS context the same way we work with the PF devices. It means
> >> that we'll have to do some special branching to handle the VF device
> >> and this voids the whole meaning of the framework which in turn is very
> unfortunate.
> >>
> > Again pls try to query reta content on pf/host, this is due to hw
> > limitation,
> 
> Again, I'm using DPDK from inside a Guest OS on Amazon Cloud. I have no
> and will never have an access to the PF due to obvious reasons thus I can't
> query it.
> Which HW limitations u are referring? It's a clear software issue - the VF-PF
> channel protocol should have a message to negotiate it but it looks like Intel
> hasn't cared to implemented it yet unless I miss something here.
> The problems don't end with the RETA. What about the hash key, which is
> also shared? There isn't an appropriate message to query it either. This is not
> a pure DPDK issue - it's a general issue with Linux 82599 drivers.
> 
On this point, I agree with you, it is not pure dpdk issue,
Dpdk use share codes(partly come from Linux 82599, and logically similar with it) from another division, the hw is also implemented by that division.
If we are talking about hw limitation and modify share code, linux 82599 driver to fully enable vf rss all
Functionality in Niantic, just talking about it in dpdk.org mailing list may not be enough. 
Personally I think maybe you could find another effective and efficient way to raise your further request.
 
> > It don't affect any functionality, just the querying is special.
> 
> How can u call the fact that some of DPDK API functionality is missing as "it
> don't affect any functionality"? Of course it affects it. Just like I said it may
> cause us treat the VF in the special way while there is not any real reason to
> do so.
I mean each vf could use multiple queues and do rss for packets,
This functionality works well.
If you could bypass the querying issue or handling it specially, you still could use the vf rss.

> > Before this patch, customer often was notified Niantic can't support
> > vf rss, But with lots of experiments and find that it still has limited vf rss
> functionality.
> > Even on that, linux ixgbe driver has at most 2 queues per vf, But the
> > dpdk could enable 4 queues per vf.
> > In summary, dpdk could support vf rss on Niantic with at most 4 queues
> > per vf, but the querying of reta is very limited due to the HW limitation.
> 
> Limited? I meant missing, right?
We are meaning same thing in different way,
"very limited" mean it still could query it on pf, but missing(or could not do it) on vf/guest
> 
> > Hope you are on the same page now.
> >
> > Thanks
> > Changchun
> >

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
       [not found]             ` <CAOYyTHbrB-VinN5ZEd1tYTnS7_GhCT1jiHiZzNKkQUEJ1rG79w@mail.gmail.com>
@ 2014-12-26  5:16               ` Vladislav Zolotarov
  2014-12-26  5:25                 ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vladislav Zolotarov @ 2014-12-26  5:16 UTC (permalink / raw)
  To: Changchun Ouyang; +Cc: dev

On Dec 26, 2014 4:41 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com>
wrote:
>
>
>
> > -----Original Message-----
> > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > Sent: Thursday, December 25, 2014 8:46 PM
> > To: Ouyang, Changchun; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
> >
> >
> > On 12/25/14 04:26, Ouyang, Changchun wrote:
> > > Hi,
> > >
> > >> -----Original Message-----
> > >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > >> Sent: Wednesday, December 24, 2014 6:49 PM
> > >> To: Ouyang, Changchun;dev@dpdk.org
> > >> Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
> > >>
> > >>
> > >> On 12/24/14 07:22, Ouyang Changchun wrote:
> > >>> This patch enables VF RSS for Niantic, which allow each VF having at
> > >>> most 4
> > >> queues.
> > >>> The actual queue number per VF depends on the total number of pool,
> > >>> which is determined by the total number of VF at PF initialization
> > >>> stage and the number of queue specified in config:
> > >>> 1) If the number of VF is in the range from 1 to 32 and the number
> > >>> of rxq is 4('--rxq 4' in testpmd), then there is totally 32
> > >>> pools(ETH_32_POOLS), and each VF have 4 queues;
> > >>>
> > >>> 2)If the number of VF is in the range from 33 to 64 and the number
> > >>> of rxq is 2('--rxq 2' in testpmd), then there is totally 64
> > >>> pools(ETH_64_POOLS), and each VF have 2 queues;
> > >>>
> > >>> On host, to enable VF RSS functionality, rx mq mode should be set as
> > >>> ETH_MQ_RX_VMDQ_RSS or ETH_MQ_RX_RSS mode, and SRIOV mode
> > >> should be activated(max_vfs >= 1).
> > >>> It also needs config VF RSS information like hash function, RSS key,
> > >>> RSS key
> > >> length.
> > >>> The limitation for Niantic VF RSS is:
> > >>> the hash and key are shared among PF and all VF, the RETA table with
> > >>> 128 entries are also shared among PF and all VF. So it is not good
> > >>> idea to query the hash and reta content per VF on guest, instead, it
> > >>> makes
> > >> sense to query them on host(PF).
> > >>> v3 change:
> > >>>     - More cleanup;
> > >> This series is still missing the appropriate patches in the
> > >> rte_eth_dev_info_get() flow to return a reta_size for a VF device;
> > >> and to
> > >> rte_eth_dev_rss_reta_query() in the context of a VF device (I haven't
> > >> noticed the initialization of a
> > >> dev->dev_ops->reta_query for the VF device in this series).
> > >>
> > >> Without these code bits it's impossible to work with the VF devices
> > >> in the RSS context the same way we work with the PF devices. It means
> > >> that we'll have to do some special branching to handle the VF device
> > >> and this voids the whole meaning of the framework which in turn is
very
> > unfortunate.
> > >>
> > > Again pls try to query reta content on pf/host, this is due to hw
> > > limitation,
> >
> > Again, I'm using DPDK from inside a Guest OS on Amazon Cloud. I have no
> > and will never have an access to the PF due to obvious reasons thus I
can't
> > query it.
> > Which HW limitations u are referring? It's a clear software issue - the
VF-PF
> > channel protocol should have a message to negotiate it but it looks
like Intel
> > hasn't cared to implemented it yet unless I miss something here.
> > The problems don't end with the RETA. What about the hash key, which is
> > also shared? There isn't an appropriate message to query it either.
This is not
> > a pure DPDK issue - it's a general issue with Linux 82599 drivers.
> >
> On this point, I agree with you, it is not pure dpdk issue,
> Dpdk use share codes(partly come from Linux 82599, and logically similar
with it) from another division, the hw is also implemented by that division.
> If we are talking about hw limitation and modify share code, linux 82599
driver to fully enable vf rss all
> Functionality in Niantic, just talking about it in dpdk.org mailing list
may not be enough.
> Personally I think maybe you could find another effective and efficient
way to raise your further request.

You r right. Apparently at this point you just can't "meet my demands"
because there's no support for these queries in the PF driver. ;) So,
let's move on.

I'll send the appropriate patch on the netdev list and we'll see how it
goes from there.

>
> > > It don't affect any functionality, just the querying is special.
> >
> > How can u call the fact that some of DPDK API functionality is missing
as "it
> > don't affect any functionality"? Of course it affects it. Just like I
said it may
> > cause us treat the VF in the special way while there is not any real
reason to
> > do so.
> I mean each vf could use multiple queues and do rss for packets,
> This functionality works well.
> If you could bypass the querying issue or handling it specially, you
still could use the vf rss.
>
> > > Before this patch, customer often was notified Niantic can't support
> > > vf rss, But with lots of experiments and find that it still has
limited vf rss
> > functionality.
> > > Even on that, linux ixgbe driver has at most 2 queues per vf, But the
> > > dpdk could enable 4 queues per vf.
> > > In summary, dpdk could support vf rss on Niantic with at most 4 queues
> > > per vf, but the querying of reta is very limited due to the HW
limitation.
> >
> > Limited? I meant missing, right?
> We are meaning same thing in different way,
> "very limited" mean it still could query it on pf, but missing(or could
not do it) on vf/guest

Again, agreed. I just so pissed that we have to rewrite so nice design just
because this simple piece of software is missing in the PF.

> >
> > > Hope you are on the same page now.

We are now... 😉

> > >
> > > Thanks
> > > Changchun
> > >
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
  2014-12-26  5:16               ` Vladislav Zolotarov
@ 2014-12-26  5:25                 ` Ouyang, Changchun
  0 siblings, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2014-12-26  5:25 UTC (permalink / raw)
  To: Vladislav Zolotarov; +Cc: dev

Hi Vladislav,

From: Vladislav Zolotarov [mailto:vladz@cloudius-systems.com]
Sent: Friday, December 26, 2014 1:16 PM
To: Ouyang, Changchun
Cc: dev@dpdk.org
Subject: RE: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic


On Dec 26, 2014 4:41 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com<mailto:changchun.ouyang@intel.com>> wrote:
>
>
>
> > -----Original Message-----
> > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com<mailto:vladz@cloudius-systems.com>]
> > Sent: Thursday, December 25, 2014 8:46 PM
> > To: Ouyang, Changchun; dev@dpdk.org<mailto:dev@dpdk.org>
> > Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
> >
> >
> > On 12/25/14 04:26, Ouyang, Changchun wrote:
> > > Hi,
> > >
> > >> -----Original Message-----
> > >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com<mailto:vladz@cloudius-systems.com>]
> > >> Sent: Wednesday, December 24, 2014 6:49 PM
> > >> To: Ouyang, Changchun;dev@dpdk.org<mailto:Changchun%3Bdev@dpdk.org>
> > >> Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
> > >>
> > >>
> > >> On 12/24/14 07:22, Ouyang Changchun wrote:
> > >>> This patch enables VF RSS for Niantic, which allow each VF having at
> > >>> most 4
> > >> queues.
> > >>> The actual queue number per VF depends on the total number of pool,
> > >>> which is determined by the total number of VF at PF initialization
> > >>> stage and the number of queue specified in config:
> > >>> 1) If the number of VF is in the range from 1 to 32 and the number
> > >>> of rxq is 4('--rxq 4' in testpmd), then there is totally 32
> > >>> pools(ETH_32_POOLS), and each VF have 4 queues;
> > >>>
> > >>> 2)If the number of VF is in the range from 33 to 64 and the number
> > >>> of rxq is 2('--rxq 2' in testpmd), then there is totally 64
> > >>> pools(ETH_64_POOLS), and each VF have 2 queues;
> > >>>
> > >>> On host, to enable VF RSS functionality, rx mq mode should be set as
> > >>> ETH_MQ_RX_VMDQ_RSS or ETH_MQ_RX_RSS mode, and SRIOV mode
> > >> should be activated(max_vfs >= 1).
> > >>> It also needs config VF RSS information like hash function, RSS key,
> > >>> RSS key
> > >> length.
> > >>> The limitation for Niantic VF RSS is:
> > >>> the hash and key are shared among PF and all VF, the RETA table with
> > >>> 128 entries are also shared among PF and all VF. So it is not good
> > >>> idea to query the hash and reta content per VF on guest, instead, it
> > >>> makes
> > >> sense to query them on host(PF).
> > >>> v3 change:
> > >>>     - More cleanup;
> > >> This series is still missing the appropriate patches in the
> > >> rte_eth_dev_info_get() flow to return a reta_size for a VF device;
> > >> and to
> > >> rte_eth_dev_rss_reta_query() in the context of a VF device (I haven't
> > >> noticed the initialization of a
> > >> dev->dev_ops->reta_query for the VF device in this series).
> > >>
> > >> Without these code bits it's impossible to work with the VF devices
> > >> in the RSS context the same way we work with the PF devices. It means
> > >> that we'll have to do some special branching to handle the VF device
> > >> and this voids the whole meaning of the framework which in turn is very
> > unfortunate.
> > >>
> > > Again pls try to query reta content on pf/host, this is due to hw
> > > limitation,
> >
> > Again, I'm using DPDK from inside a Guest OS on Amazon Cloud. I have no
> > and will never have an access to the PF due to obvious reasons thus I can't
> > query it.
> > Which HW limitations u are referring? It's a clear software issue - the VF-PF
> > channel protocol should have a message to negotiate it but it looks like Intel
> > hasn't cared to implemented it yet unless I miss something here.
> > The problems don't end with the RETA. What about the hash key, which is
> > also shared? There isn't an appropriate message to query it either. This is not
> > a pure DPDK issue - it's a general issue with Linux 82599 drivers.
> >
> On this point, I agree with you, it is not pure dpdk issue,
> Dpdk use share codes(partly come from Linux 82599, and logically similar with it) from another division, the hw is also implemented by that division.
> If we are talking about hw limitation and modify share code, linux 82599 driver to fully enable vf rss all
> Functionality in Niantic, just talking about it in dpdk.org<http://dpdk.org> mailing list may not be enough.
> Personally I think maybe you could find another effective and efficient way to raise your further request.

You r right. Apparently at this point you just can't "meet my demands" because there's no support for these queries in the PF driver. ;) So,  let's move on.

I'll send the appropriate patch on the netdev list and we'll see how it goes from there.

Changchun: yes, please go forward to do it.

>
> > > It don't affect any functionality, just the querying is special.
> >
> > How can u call the fact that some of DPDK API functionality is missing as "it
> > don't affect any functionality"? Of course it affects it. Just like I said it may
> > cause us treat the VF in the special way while there is not any real reason to
> > do so.
> I mean each vf could use multiple queues and do rss for packets,
> This functionality works well.
> If you could bypass the querying issue or handling it specially, you still could use the vf rss.
>
> > > Before this patch, customer often was notified Niantic can't support
> > > vf rss, But with lots of experiments and find that it still has limited vf rss
> > functionality.
> > > Even on that, linux ixgbe driver has at most 2 queues per vf, But the
> > > dpdk could enable 4 queues per vf.
> > > In summary, dpdk could support vf rss on Niantic with at most 4 queues
> > > per vf, but the querying of reta is very limited due to the HW limitation.
> >
> > Limited? I meant missing, right?
> We are meaning same thing in different way,
> "very limited" mean it still could query it on pf, but missing(or could not do it) on vf/guest

Again, agreed. I just so pissed that we have to rewrite so nice design just because this simple piece of software is missing in the PF.

> >
> > > Hope you are on the same page now.

We are now... 😉

Changchun: very glad to see this! ☺

Thanks and regards,

Changchun



^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-26  1:52             ` Ouyang, Changchun
@ 2014-12-26  6:49               ` Vladislav Zolotarov
  2014-12-26  7:26                 ` Ouyang, Changchun
  2015-01-05 10:29               ` Bruce Richardson
  1 sibling, 1 reply; 144+ messages in thread
From: Vladislav Zolotarov @ 2014-12-26  6:49 UTC (permalink / raw)
  To: Changchun Ouyang; +Cc: dev

On Dec 26, 2014 3:52 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com>
wrote:
>
>
>
> > -----Original Message-----
> > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > Sent: Thursday, December 25, 2014 9:20 PM
> > To: Ouyang, Changchun; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> >
> >
> > On 12/25/14 04:43, Ouyang, Changchun wrote:
> > > Hi,
> > > Sorry miss some comments, so continue my response below,
> > >
> > >> -----Original Message-----
> > >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > >> Sent: Wednesday, December 24, 2014 6:40 PM
> > >> To: Ouyang, Changchun; dev@dpdk.org
> > >> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> > >>
> > >>
> > >> On 12/24/14 07:23, Ouyang Changchun wrote:
> > >>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable
> > VF
> > >> RSS.
> > >>> The psrtype will determine how many queues the received packets will
> > >>> distribute to, and the value of psrtype should depends on both
facet:
> > >>> max VF rxq number which has been negotiated with PF, and the number
> > >>> of
> > >> rxq specified in config on guest.
> > >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > >>> ---
> > >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> > >>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> > >> ++++++++++++++++++++++++++++++++++-----
> > >>>    2 files changed, 97 insertions(+), 10 deletions(-)
> > >>>
> > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > >>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> > >> *eth_dev)
> > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> > mac.num_rar_entries), 0);
> > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> > mac.num_rar_entries), 0);
> > >>>
> > >>> + /*
> > >>> +  * VF RSS can support at most 4 queues for each VF, even if
> > >>> +  * 8 queues are available for each VF, it need refine to 4
> > >>> +  * queues here due to this limitation, otherwise no queue
> > >>> +  * will receive any packet even RSS is enabled.
> > >> According to Table 7-3 in the 82599 spec RSS is not available when
> > >> port is configured to have 8 queues per pool. This means that if u
> > >> see this configuration u may immediately disable RSS flow in your
code.
> > >>
> > >>> +  */
> > >>> + if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> > >> ETH_MQ_RX_VMDQ_RSS) {
> > >>> +         if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).active =
> > >> ETH_32_POOLS;
> > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> > >>> +                         dev_num_vf(eth_dev) * 4;
> > >> According to 82599 spec u can't do that since RSS is not allowed when
> > >> port is configured to have 8 function per-VF. Have u verified that
> > >> this works? If yes, then spec should be updated.
> > >>
> > >>> +         }
> > >>> + }
> > >>> +
> > >>>           /* set VMDq map to default PF pool */
> > >>>           hw->mac.ops.set_vmdq(hw, 0,
> > >>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> > >>>
> > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > >>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > >>> index f69abda..a7c17a4 100644
> > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > >>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> > >> igb_rx_queue *rxq)
> > >>>    }
> > >>>
> > >>>    static int
> > >>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> > >>> + struct ixgbe_hw *hw;
> > >>> + uint32_t mrqc;
> > >>> +
> > >>> + ixgbe_rss_configure(dev);
> > >>> +
> > >>> + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > >>> +
> > >>> + /* MRQC: enable VF RSS */
> > >>> + mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> > >>> + mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> > >>> + switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > >>> + case ETH_64_POOLS:
> > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> > >>> +         break;
> > >>> +
> > >>> + case ETH_32_POOLS:
> > >>> + case ETH_16_POOLS:
> > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> > >> Again, this contradicts with the spec.
> > > Yes, the spec say the hw can't support vf rss at all, but experiment
find that
> > could be done.
> >
> > The spec explicitly say that VF RSS *is* supported in particular in the
table
> > mentioned above.
> But the spec(January 2014 revision 2.9) on my hand says: "in IOV mode,
VMDq+RSS mode is not available"  in note of section 4.6.10.2.1

And still there is the whole section about configuring packet filtering
including Rx in the VF mode (including the table i've referred) . It's
quite confusing i must say...

> > What your code is doing is that in case of 16 VFs u setup a 32 pools
> > configuration and use only 16 out of them.
> But I don't see any big issue here, in this case, each vf COULD have 8
queues, like I said before, but this is estimation value, actually only 4
queues
> Are really available for one vf, you can refer to spec for the
correctness here.

No issues, i just wanted to clarify that it seems like you are doing it
quite according to the spec.

> >
> > > We can focus on discussing the implementation firstly.

Right. So, after we clarified that there is nothing u can do at the moment
about the rss query flow, there is  one more open issue here.
In general we need a way to know how many  queues from those that are
available may be configured as RSS. While the same issue is present with
the PF as well (it's 16 for 82599 but it may be a different number for a
different device) for VF it's more pronounced since it depends on the PF
configuration.

Don't u think it would be logical to add a specific filed for it in the
dev_info struct?

> >
> > >
> > >>> +         break;
> > >>> +
> > >>> + default:
> > >>> +         PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> > >>> +         return -EINVAL;
> > >>> + }
> > >>> +
> > >>> + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> > >>> +
> > >>> + return 0;
> > >>> +}
> > >>> +
> > >>> +static int
> > >>>    ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
> > >>>    {
> > >>>           struct ixgbe_hw *hw =
> > >>> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
> > >> rte_eth_dev *dev)
> > >>>                           default: ixgbe_rss_disable(dev);
> > >>>                   }
> > >>>           } else {
> > >>> -         switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > >>>                   /*
> > >>>                    * SRIOV active scheme
> > >>>                    * FIXME if support DCB/RSS together with VMDq &
SRIOV
> > >>>                    */
> > >>> -         case ETH_64_POOLS:
> > >>> -                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > >> IXGBE_MRQC_VMDQEN);
> > >>> +         switch (dev->data->dev_conf.rxmode.mq_mode) {
> > >>> +         case ETH_MQ_RX_RSS:
> > >>> +         case ETH_MQ_RX_VMDQ_RSS:
> > >>> +                 ixgbe_config_vf_rss(dev);
> > >>>                           break;
> > >>>
> > >>> -         case ETH_32_POOLS:
> > >>> -                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > >> IXGBE_MRQC_VMDQRT4TCEN);
> > >>> -                 break;
> > >>> +         default:
> > >>> +                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > >> Sorry for nitpicking but have u considered taking this encapsulated
> > >> "switch- case" block into a separate function? This could make the
> > >> code look a lot nicer. ;)
> > >>
> > >>> +                 case ETH_64_POOLS:
> > >>> +                         IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > >>> +                                 IXGBE_MRQC_VMDQEN);
> > >>> +                         break;
> > >>>
> > >>> -         case ETH_16_POOLS:
> > >>> -                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > >> IXGBE_MRQC_VMDQRT8TCEN);
> > >>> +                 case ETH_32_POOLS:
> > >>> +                         IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > >>> +                                 IXGBE_MRQC_VMDQRT4TCEN);
> > >>> +                         break;
> > >>> +
> > >>> +                 case ETH_16_POOLS:
> > >>> +                         IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > >>> +                                 IXGBE_MRQC_VMDQRT8TCEN);
> > >>> +                         break;
> > >>> +                 default:
> > >>> +                         PMD_INIT_LOG(ERR,
> > >>> +                                 "invalid pool number in IOV
mode");
> > >>> +                         break;
> > >>> +                 }
> > >>>                           break;
> > >>> -         default:
> > >>> -                 PMD_INIT_LOG(ERR, "invalid pool number in IOV
> > >> mode");
> > >>>                   }
> > >>>           }
> > >>>
> > >>> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev
> > *dev)
> > >>>           uint16_t buf_size;
> > >>>           uint16_t i;
> > >>>           int ret;
> > >>> + uint16_t valid_rxq_num;
> > >>>
> > >>>           PMD_INIT_FUNC_TRACE();
> > >>>           hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > >>>
> > >>> + valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
> > >>> +hw->mac.max_rx_queues);
> > >>> +
> > >>> + /*
> > >>> +  * VMDq RSS can't support 3 queues, so config it into 4 queues,
> > >>> +  * and give user a hint that some packets may loss if it doesn't
> > >>> +  * poll the queue where those packets are distributed to.
> > >>> +  */
> > >>> + if (valid_rxq_num == 3)
> > >>> +         valid_rxq_num = 4;
> > >> Why to configure more queues that requested and not less (2)? Why to
> > >> configure anything at all and not return an error?
> > > Sorry, I don't agree this is "anything" you say, because I don't use
5,6,7,
> > 8, ..., 16, 2014, 2015,... etc.
> > > By considering 2 or 4,
> > > I prefer 4, the reason is if user need more than 3 queues per vf to do
> > > something, And pf has also the capability to setup 4 queues per vf,
> > > confining to 2 queues is also not good thing, So here try to enable 4
queues,
> > and give user hints here.
> > > Btw, change it into 2 is another way, depends on other guys' more
insight
> > here.
> >
> > Like I said before, trying to guess what user wants is a way to making
a code
> > that is very hard to use and to maintain. Pls., just return an error
and let the
> > user code deal with it the way he/she really wants and not the way u
*think*
> > he/she wants.
> >
> I didn't disagree on this, either :-)
> If you have strong reason for this way and more guys agree with it,
> I will modify it probably in v4.
> > >
> > >>> +
> > >>> + if (dev->data->nb_rx_queues > valid_rxq_num) {
> > >>> +         PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> > >>> +                 "it should be equal to or less than %d",
> > >>> +                 valid_rxq_num);
> > >>> +         return -1;
> > >>> + } else if (dev->data->nb_rx_queues < valid_rxq_num)
> > >>> +         PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> > >>> +                 "than the number of available Rx queues:%d, "
> > >>> +                 "packets in Rx queues(q_id >= %d) may loss.",
> > >>> +                 valid_rxq_num, dev->data->nb_rx_queues);
> > >> Who ever looks in the "INIT_LOG" if everything "work well" and u make
> > >> it look so by allowing this call to succeed. And then some packets
> > >> will just silently not arrive?! And what the used should somehow
guess to
> > do?
> > >> - Look in the "INIT_LOG"?! This is a nightmare!
> > >>
> > >>> +
> > >>>           /*
> > >>>            * When the VF driver issues a IXGBE_VF_RESET request,
the PF
> > >> driver
> > >>>            * disables the VF receipt of packets if the PF MTU is >
1500.
> > >>> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> > >>>                           IXGBE_PSRTYPE_IPV6HDR;
> > >>>    #endif
> > >>>
> > >>> + /* Set RQPL for VF RSS according to max Rx queue */
> > >>> + psrtype |= (valid_rxq_num >> 1) <<
> > >>> +         IXGBE_PSRTYPE_RQPL_SHIFT;
> > >>>           IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> > >>>
> > >>>           if (dev->data->dev_conf.rxmode.enable_scatter) {
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-26  6:49               ` Vladislav Zolotarov
@ 2014-12-26  7:26                 ` Ouyang, Changchun
  2014-12-26  7:37                   ` Vladislav Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2014-12-26  7:26 UTC (permalink / raw)
  To: Vladislav Zolotarov; +Cc: dev

Hi Vladislav,

From: Vladislav Zolotarov [mailto:vladz@cloudius-systems.com]
Sent: Friday, December 26, 2014 2:49 PM
To: Ouyang, Changchun
Cc: dev@dpdk.org
Subject: RE: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS


On Dec 26, 2014 3:52 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com<mailto:changchun.ouyang@intel.com>> wrote:
>
>
>
> > -----Original Message-----
> > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com<mailto:vladz@cloudius-systems.com>]
> > Sent: Thursday, December 25, 2014 9:20 PM
> > To: Ouyang, Changchun; dev@dpdk.org<mailto:dev@dpdk.org>
> > Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> >
> >
> > On 12/25/14 04:43, Ouyang, Changchun wrote:
> > > Hi,
> > > Sorry miss some comments, so continue my response below,
> > >
> > >> -----Original Message-----
> > >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com<mailto:vladz@cloudius-systems.com>]
> > >> Sent: Wednesday, December 24, 2014 6:40 PM
> > >> To: Ouyang, Changchun; dev@dpdk.org<mailto:dev@dpdk.org>
> > >> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> > >>
> > >>
> > >> On 12/24/14 07:23, Ouyang Changchun wrote:
> > >>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable
> > VF
> > >> RSS.
> > >>> The psrtype will determine how many queues the received packets will
> > >>> distribute to, and the value of psrtype should depends on both facet:
> > >>> max VF rxq number which has been negotiated with PF, and the number
> > >>> of
> > >> rxq specified in config on guest.
> > >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com<mailto:changchun.ouyang@intel.com>>
> > >>> ---
> > >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> > >>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> > >> ++++++++++++++++++++++++++++++++++-----
> > >>>    2 files changed, 97 insertions(+), 10 deletions(-)
> > >>>
> > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > >>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> > >> *eth_dev)
> > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> > mac.num_rar_entries), 0);
> > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> > mac.num_rar_entries), 0);
> > >>>
> > >>> + /*
> > >>> +  * VF RSS can support at most 4 queues for each VF, even if
> > >>> +  * 8 queues are available for each VF, it need refine to 4
> > >>> +  * queues here due to this limitation, otherwise no queue
> > >>> +  * will receive any packet even RSS is enabled.
> > >> According to Table 7-3 in the 82599 spec RSS is not available when
> > >> port is configured to have 8 queues per pool. This means that if u
> > >> see this configuration u may immediately disable RSS flow in your code.
> > >>
> > >>> +  */
> > >>> + if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> > >> ETH_MQ_RX_VMDQ_RSS) {
> > >>> +         if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).active =
> > >> ETH_32_POOLS;
> > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> > >>> +                         dev_num_vf(eth_dev) * 4;
> > >> According to 82599 spec u can't do that since RSS is not allowed when
> > >> port is configured to have 8 function per-VF. Have u verified that
> > >> this works? If yes, then spec should be updated.
> > >>
> > >>> +         }
> > >>> + }
> > >>> +
> > >>>           /* set VMDq map to default PF pool */
> > >>>           hw->mac.ops.set_vmdq(hw, 0,
> > >>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> > >>>
> > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > >>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > >>> index f69abda..a7c17a4 100644
> > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > >>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> > >> igb_rx_queue *rxq)
> > >>>    }
> > >>>
> > >>>    static int
> > >>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> > >>> + struct ixgbe_hw *hw;
> > >>> + uint32_t mrqc;
> > >>> +
> > >>> + ixgbe_rss_configure(dev);
> > >>> +
> > >>> + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > >>> +
> > >>> + /* MRQC: enable VF RSS */
> > >>> + mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> > >>> + mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> > >>> + switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > >>> + case ETH_64_POOLS:
> > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> > >>> +         break;
> > >>> +
> > >>> + case ETH_32_POOLS:
> > >>> + case ETH_16_POOLS:
> > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> > >> Again, this contradicts with the spec.
> > > Yes, the spec say the hw can't support vf rss at all, but experiment find that
> > could be done.
> >
> > The spec explicitly say that VF RSS *is* supported in particular in the table
> > mentioned above.
> But the spec(January 2014 revision 2.9) on my hand says: "in IOV mode, VMDq+RSS mode is not available"  in note of section 4.6.10.2.1

>And still there is the whole section about configuring packet filtering including Rx in the VF mode (including the table i've referred) . It's quite confusing i must say...

Changchun: do you mind tell me which table you are referring to, I will try to have a look and may share my thought if I can.

> > What your code is doing is that in case of 16 VFs u setup a 32 pools
> > configuration and use only 16 out of them.
> But I don't see any big issue here, in this case, each vf COULD have 8 queues, like I said before, but this is estimation value, actually only 4 queues
> Are really available for one vf, you can refer to spec for the correctness here.

>No issues, i just wanted to clarify that it seems like you are doing it quite according to the spec.

> >
> > > We can focus on discussing the implementation firstly.

>Right. So, after we clarified that there is nothing u can do at the moment about the rss query flow, there is  one more open issue here.
>In general we need a way to know how many  queues from those that are available may be configured as RSS. While the same issue is present with the PF as well (it's 16 for 82599 but it may be a different number for a different device) for VF it's more pronounced since it depends on the PF configuration.

>Don't u think it would be logical to add a specific filed for it in the dev_info struct?

Changchun: you are right, and we have already the max_rx_queues in dev_info,

while negotiating between pf and vf, the negotiated max rx queue number will be set into hw->mac.max_rx_queues,

And after that when you call ixgbe_dev_info_get, that value will be set into dev_info->max_rx_queues.

Then you could get the number of queue all packets will distribute to by getting dev_info->max_rx_queues.

Thanks

Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-26  7:26                 ` Ouyang, Changchun
@ 2014-12-26  7:37                   ` Vladislav Zolotarov
  2014-12-26  8:45                     ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vladislav Zolotarov @ 2014-12-26  7:37 UTC (permalink / raw)
  To: Changchun Ouyang; +Cc: dev

On Dec 26, 2014 9:28 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com>
wrote:
>
> Hi Vladislav,
>
>
>
> From: Vladislav Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Friday, December 26, 2014 2:49 PM
> To: Ouyang, Changchun
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>
>
>
>
> On Dec 26, 2014 3:52 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com>
wrote:
> >
> >
> >
> > > -----Original Message-----
> > > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > > Sent: Thursday, December 25, 2014 9:20 PM
> > > To: Ouyang, Changchun; dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> > >
> > >
> > > On 12/25/14 04:43, Ouyang, Changchun wrote:
> > > > Hi,
> > > > Sorry miss some comments, so continue my response below,
> > > >
> > > >> -----Original Message-----
> > > >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > > >> Sent: Wednesday, December 24, 2014 6:40 PM
> > > >> To: Ouyang, Changchun; dev@dpdk.org
> > > >> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> > > >>
> > > >>
> > > >> On 12/24/14 07:23, Ouyang Changchun wrote:
> > > >>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable
> > > VF
> > > >> RSS.
> > > >>> The psrtype will determine how many queues the received packets
will
> > > >>> distribute to, and the value of psrtype should depends on both
facet:
> > > >>> max VF rxq number which has been negotiated with PF, and the
number
> > > >>> of
> > > >> rxq specified in config on guest.
> > > >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > > >>> ---
> > > >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> > > >>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> > > >> ++++++++++++++++++++++++++++++++++-----
> > > >>>    2 files changed, 97 insertions(+), 10 deletions(-)
> > > >>>
> > > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> > > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > >>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct
rte_eth_dev
> > > >> *eth_dev)
> > > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> > > mac.num_rar_entries), 0);
> > > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> > > mac.num_rar_entries), 0);
> > > >>>
> > > >>> + /*
> > > >>> +  * VF RSS can support at most 4 queues for each VF, even if
> > > >>> +  * 8 queues are available for each VF, it need refine to 4
> > > >>> +  * queues here due to this limitation, otherwise no queue
> > > >>> +  * will receive any packet even RSS is enabled.
> > > >> According to Table 7-3 in the 82599 spec RSS is not available when
> > > >> port is configured to have 8 queues per pool. This means that if u
> > > >> see this configuration u may immediately disable RSS flow in your
code.
> > > >>
> > > >>> +  */
> > > >>> + if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> > > >> ETH_MQ_RX_VMDQ_RSS) {
> > > >>> +         if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> > > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).active =
> > > >> ETH_32_POOLS;
> > > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> > > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> > > >>> +                         dev_num_vf(eth_dev) * 4;
> > > >> According to 82599 spec u can't do that since RSS is not allowed
when
> > > >> port is configured to have 8 function per-VF. Have u verified that
> > > >> this works? If yes, then spec should be updated.
> > > >>
> > > >>> +         }
> > > >>> + }
> > > >>> +
> > > >>>           /* set VMDq map to default PF pool */
> > > >>>           hw->mac.ops.set_vmdq(hw, 0,
> > > >>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> > > >>>
> > > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > >>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > >>> index f69abda..a7c17a4 100644
> > > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > >>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> > > >> igb_rx_queue *rxq)
> > > >>>    }
> > > >>>
> > > >>>    static int
> > > >>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> > > >>> + struct ixgbe_hw *hw;
> > > >>> + uint32_t mrqc;
> > > >>> +
> > > >>> + ixgbe_rss_configure(dev);
> > > >>> +
> > > >>> + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > > >>> +
> > > >>> + /* MRQC: enable VF RSS */
> > > >>> + mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> > > >>> + mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> > > >>> + switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > > >>> + case ETH_64_POOLS:
> > > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> > > >>> +         break;
> > > >>> +
> > > >>> + case ETH_32_POOLS:
> > > >>> + case ETH_16_POOLS:
> > > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> > > >> Again, this contradicts with the spec.
> > > > Yes, the spec say the hw can't support vf rss at all, but
experiment find that
> > > could be done.
> > >
> > > The spec explicitly say that VF RSS *is* supported in particular in
the table
> > > mentioned above.
> > But the spec(January 2014 revision 2.9) on my hand says: "in IOV mode,
VMDq+RSS mode is not available"  in note of section 4.6.10.2.1
>
> >And still there is the whole section about configuring packet filtering
including Rx in the VF mode (including the table i've referred) . It's
quite confusing i must say...
>
> Changchun: do you mind tell me which table you are referring to, I will
try to have a look and may share my thought if I can.
>
> > > What your code is doing is that in case of 16 VFs u setup a 32 pools
> > > configuration and use only 16 out of them.
> > But I don't see any big issue here, in this case, each vf COULD have 8
queues, like I said before, but this is estimation value, actually only 4
queues
> > Are really available for one vf, you can refer to spec for the
correctness here.
>
> >No issues, i just wanted to clarify that it seems like you are doing it
quite according to the spec.
>
> > >
> > > > We can focus on discussing the implementation firstly.
>
> >Right. So, after we clarified that there is nothing u can do at the
moment about the rss query flow, there is  one more open issue here.
> >In general we need a way to know how many  queues from those that are
available may be configured as RSS. While the same issue is present with
the PF as well (it's 16 for 82599 but it may be a different number for a
different device) for VF it's more pronounced since it depends on the PF
configuration.
>
> >Don't u think it would be logical to add a specific filed for it in the
dev_info struct?
>
> Changchun: you are right, and we have already the max_rx_queues in
dev_info,
>
> while negotiating between pf and vf, the negotiated max rx queue number
will be set into hw->mac.max_rx_queues,
>
> And after that when you call ixgbe_dev_info_get, that value will be set
into dev_info->max_rx_queues.
>
> Then you could get the number of queue all packets will distribute to by
getting dev_info->max_rx_queues.

I'm afraid u've missed my point here. For instance, for a PF max_rx_queues
will be set to 128 while u may only configure 16 RSS queues. The similar
will happen for a VF in the 16 VF configuration: max_rx_queues will be set
to 8 while u may configure only 4 RSS queues.

This is why i suggested to add a separate info field... 😉

>
> Thanks
>
> Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-26  7:37                   ` Vladislav Zolotarov
@ 2014-12-26  8:45                     ` Ouyang, Changchun
  2014-12-28 10:14                       ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2014-12-26  8:45 UTC (permalink / raw)
  To: Vladislav Zolotarov; +Cc: dev

Hi Vladislav,

From: Vladislav Zolotarov [mailto:vladz@cloudius-systems.com] 
Sent: Friday, December 26, 2014 3:37 PM
To: Ouyang, Changchun
Cc: dev@dpdk.org
Subject: RE: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS


On Dec 26, 2014 9:28 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com> wrote:
>
> Hi Vladislav,
>
>  
>
> From: Vladislav Zolotarov [mailto:vladz@cloudius-systems.com] 
> Sent: Friday, December 26, 2014 2:49 PM
> To: Ouyang, Changchun
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>
>  
>
>
> On Dec 26, 2014 3:52 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com> wrote:
> >
> >
> >
> > > -----Original Message-----
> > > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > > Sent: Thursday, December 25, 2014 9:20 PM
> > > To: Ouyang, Changchun; dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> > >
> > >
> > > On 12/25/14 04:43, Ouyang, Changchun wrote:
> > > > Hi,
> > > > Sorry miss some comments, so continue my response below,
> > > >
> > > >> -----Original Message-----
> > > >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > > >> Sent: Wednesday, December 24, 2014 6:40 PM
> > > >> To: Ouyang, Changchun; dev@dpdk.org
> > > >> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> > > >>
> > > >>
> > > >> On 12/24/14 07:23, Ouyang Changchun wrote:
> > > >>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable
> > > VF
> > > >> RSS.
> > > >>> The psrtype will determine how many queues the received packets will
> > > >>> distribute to, and the value of psrtype should depends on both facet:
> > > >>> max VF rxq number which has been negotiated with PF, and the number
> > > >>> of
> > > >> rxq specified in config on guest.
> > > >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > > >>> ---
> > > >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> > > >>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> > > >> ++++++++++++++++++++++++++++++++++-----
> > > >>>    2 files changed, 97 insertions(+), 10 deletions(-)
> > > >>>
> > > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> > > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > >>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> > > >> *eth_dev)
> > > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> > > mac.num_rar_entries), 0);
> > > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> > > mac.num_rar_entries), 0);
> > > >>>
> > > >>> + /*
> > > >>> +  * VF RSS can support at most 4 queues for each VF, even if
> > > >>> +  * 8 queues are available for each VF, it need refine to 4
> > > >>> +  * queues here due to this limitation, otherwise no queue
> > > >>> +  * will receive any packet even RSS is enabled.
> > > >> According to Table 7-3 in the 82599 spec RSS is not available when
> > > >> port is configured to have 8 queues per pool. This means that if u
> > > >> see this configuration u may immediately disable RSS flow in your code.
> > > >>
> > > >>> +  */
> > > >>> + if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> > > >> ETH_MQ_RX_VMDQ_RSS) {
> > > >>> +         if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> > > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).active =
> > > >> ETH_32_POOLS;
> > > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> > > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> > > >>> +                         dev_num_vf(eth_dev) * 4;
> > > >> According to 82599 spec u can't do that since RSS is not allowed when
> > > >> port is configured to have 8 function per-VF. Have u verified that
> > > >> this works? If yes, then spec should be updated.
> > > >>
> > > >>> +         }
> > > >>> + }
> > > >>> +
> > > >>>           /* set VMDq map to default PF pool */
> > > >>>           hw->mac.ops.set_vmdq(hw, 0,
> > > >>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> > > >>>
> > > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > >>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > >>> index f69abda..a7c17a4 100644
> > > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > >>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> > > >> igb_rx_queue *rxq)
> > > >>>    }
> > > >>>
> > > >>>    static int
> > > >>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> > > >>> + struct ixgbe_hw *hw;
> > > >>> + uint32_t mrqc;
> > > >>> +
> > > >>> + ixgbe_rss_configure(dev);
> > > >>> +
> > > >>> + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > > >>> +
> > > >>> + /* MRQC: enable VF RSS */
> > > >>> + mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> > > >>> + mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> > > >>> + switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > > >>> + case ETH_64_POOLS:
> > > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> > > >>> +         break;
> > > >>> +
> > > >>> + case ETH_32_POOLS:
> > > >>> + case ETH_16_POOLS:
> > > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> > > >> Again, this contradicts with the spec.
> > > > Yes, the spec say the hw can't support vf rss at all, but experiment find that
> > > could be done.
> > >
> > > The spec explicitly say that VF RSS *is* supported in particular in the table
> > > mentioned above.
> > But the spec(January 2014 revision 2.9) on my hand says: "in IOV mode, VMDq+RSS mode is not available"  in note of section 4.6.10.2.1
>
> >And still there is the whole section about configuring packet filtering including Rx in the VF mode (including the table i've referred) . It's quite confusing i must say...
>
> Changchun: do you mind tell me which table you are referring to, I will try to have a look and may share my thought if I can.
>
> > > What your code is doing is that in case of 16 VFs u setup a 32 pools
> > > configuration and use only 16 out of them.
> > But I don't see any big issue here, in this case, each vf COULD have 8 queues, like I said before, but this is estimation value, actually only 4 queues
> > Are really available for one vf, you can refer to spec for the correctness here.
>
> >No issues, i just wanted to clarify that it seems like you are doing it quite according to the spec.
>
> > >
> > > > We can focus on discussing the implementation firstly.
>
> >Right. So, after we clarified that there is nothing u can do at the moment about the rss query flow, there is  one more open issue here. 
> >In general we need a way to know how many  queues from those that are available may be configured as RSS. While the same issue is present with the PF as well (it's 16 for 82599 but it may be a different number for a different device) for VF it's more pronounced since it depends on the PF configuration.
>
> >Don't u think it would be logical to add a specific filed for it in the dev_info struct?
>
> Changchun: you are right, and we have already the max_rx_queues in dev_info,
>
> while negotiating between pf and vf, the negotiated max rx queue number will be set into hw->mac.max_rx_queues,
>
> And after that when you call ixgbe_dev_info_get, that value will be set into dev_info->max_rx_queues.
>
> Then you could get the number of queue all packets will distribute to by getting dev_info->max_rx_queues.
>I'm afraid u've missed my point here. For instance, for a PF max_rx_queues will be set to 128 while u may only configure 16 RSS queues. The similar will happen for a VF in the 16 VF
>configuration: max_rx_queues will be set to 8 while u may configure only 4 RSS queues. 
>This is why i suggested to add a separate info field... 😉 
Yes, I got your point this time, but the issue is that when I have 16 vf, and try to set max_rx_queues as 8, then no queue can rx any packet on vf,
This is why I have to add a logic to refine the rx queue number from 8 to 4 queues.
I have tried to do it in the way as you suggest, but unfortunately rx queue can't work.  If you find any other good method, pls let me know.
Thanks and regards,
Changchun
  

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-26  8:45                     ` Ouyang, Changchun
@ 2014-12-28 10:14                       ` Vlad Zolotarov
  0 siblings, 0 replies; 144+ messages in thread
From: Vlad Zolotarov @ 2014-12-28 10:14 UTC (permalink / raw)
  To: Ouyang, Changchun; +Cc: dev


On 12/26/14 10:45, Ouyang, Changchun wrote:
> Hi Vladislav,
>
> From: Vladislav Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Friday, December 26, 2014 3:37 PM
> To: Ouyang, Changchun
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>
>
> On Dec 26, 2014 9:28 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com> wrote:
>> Hi Vladislav,
>>
>>   
>>
>> From: Vladislav Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Friday, December 26, 2014 2:49 PM
>> To: Ouyang, Changchun
>> Cc: dev@dpdk.org
>> Subject: RE: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>>
>>   
>>
>>
>> On Dec 26, 2014 3:52 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com> wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>> Sent: Thursday, December 25, 2014 9:20 PM
>>>> To: Ouyang, Changchun; dev@dpdk.org
>>>> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>>>>
>>>>
>>>> On 12/25/14 04:43, Ouyang, Changchun wrote:
>>>>> Hi,
>>>>> Sorry miss some comments, so continue my response below,
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>>>> Sent: Wednesday, December 24, 2014 6:40 PM
>>>>>> To: Ouyang, Changchun; dev@dpdk.org
>>>>>> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>>>>>>
>>>>>>
>>>>>> On 12/24/14 07:23, Ouyang Changchun wrote:
>>>>>>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable
>>>> VF
>>>>>> RSS.
>>>>>>> The psrtype will determine how many queues the received packets will
>>>>>>> distribute to, and the value of psrtype should depends on both facet:
>>>>>>> max VF rxq number which has been negotiated with PF, and the number
>>>>>>> of
>>>>>> rxq specified in config on guest.
>>>>>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>>>>>> ---
>>>>>>>      lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
>>>>>>>      lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
>>>>>> ++++++++++++++++++++++++++++++++++-----
>>>>>>>      2 files changed, 97 insertions(+), 10 deletions(-)
>>>>>>>
>>>>>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>>>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
>>>>>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>>>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>>>>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
>>>>>> *eth_dev)
>>>>>>>             IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
>>>> mac.num_rar_entries), 0);
>>>>>>>             IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
>>>> mac.num_rar_entries), 0);
>>>>>>> + /*
>>>>>>> +  * VF RSS can support at most 4 queues for each VF, even if
>>>>>>> +  * 8 queues are available for each VF, it need refine to 4
>>>>>>> +  * queues here due to this limitation, otherwise no queue
>>>>>>> +  * will receive any packet even RSS is enabled.
>>>>>> According to Table 7-3 in the 82599 spec RSS is not available when
>>>>>> port is configured to have 8 queues per pool. This means that if u
>>>>>> see this configuration u may immediately disable RSS flow in your code.
>>>>>>
>>>>>>> +  */
>>>>>>> + if (eth_dev->data->dev_conf.rxmode.mq_mode ==
>>>>>> ETH_MQ_RX_VMDQ_RSS) {
>>>>>>> +         if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
>>>>>>> +                 RTE_ETH_DEV_SRIOV(eth_dev).active =
>>>>>> ETH_32_POOLS;
>>>>>>> +                 RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
>>>>>>> +                 RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
>>>>>>> +                         dev_num_vf(eth_dev) * 4;
>>>>>> According to 82599 spec u can't do that since RSS is not allowed when
>>>>>> port is configured to have 8 function per-VF. Have u verified that
>>>>>> this works? If yes, then spec should be updated.
>>>>>>
>>>>>>> +         }
>>>>>>> + }
>>>>>>> +
>>>>>>>             /* set VMDq map to default PF pool */
>>>>>>>             hw->mac.ops.set_vmdq(hw, 0,
>>>>>>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
>>>>>>>
>>>>>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>>>>>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>>>>>> index f69abda..a7c17a4 100644
>>>>>>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>>>>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>>>>>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
>>>>>> igb_rx_queue *rxq)
>>>>>>>      }
>>>>>>>
>>>>>>>      static int
>>>>>>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
>>>>>>> + struct ixgbe_hw *hw;
>>>>>>> + uint32_t mrqc;
>>>>>>> +
>>>>>>> + ixgbe_rss_configure(dev);
>>>>>>> +
>>>>>>> + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>>>>>> +
>>>>>>> + /* MRQC: enable VF RSS */
>>>>>>> + mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
>>>>>>> + mrqc &= ~IXGBE_MRQC_MRQE_MASK;
>>>>>>> + switch (RTE_ETH_DEV_SRIOV(dev).active) {
>>>>>>> + case ETH_64_POOLS:
>>>>>>> +         mrqc |= IXGBE_MRQC_VMDQRSS64EN;
>>>>>>> +         break;
>>>>>>> +
>>>>>>> + case ETH_32_POOLS:
>>>>>>> + case ETH_16_POOLS:
>>>>>>> +         mrqc |= IXGBE_MRQC_VMDQRSS32EN;
>>>>>> Again, this contradicts with the spec.
>>>>> Yes, the spec say the hw can't support vf rss at all, but experiment find that
>>>> could be done.
>>>>
>>>> The spec explicitly say that VF RSS *is* supported in particular in the table
>>>> mentioned above.
>>> But the spec(January 2014 revision 2.9) on my hand says: "in IOV mode, VMDq+RSS mode is not available"  in note of section 4.6.10.2.1
>>> And still there is the whole section about configuring packet filtering including Rx in the VF mode (including the table i've referred) . It's quite confusing i must say...
>> Changchun: do you mind tell me which table you are referring to, I will try to have a look and may share my thought if I can.
>>
>>>> What your code is doing is that in case of 16 VFs u setup a 32 pools
>>>> configuration and use only 16 out of them.
>>> But I don't see any big issue here, in this case, each vf COULD have 8 queues, like I said before, but this is estimation value, actually only 4 queues
>>> Are really available for one vf, you can refer to spec for the correctness here.
>>> No issues, i just wanted to clarify that it seems like you are doing it quite according to the spec.
>>>>> We can focus on discussing the implementation firstly.
>>> Right. So, after we clarified that there is nothing u can do at the moment about the rss query flow, there is  one more open issue here.
>>> In general we need a way to know how many  queues from those that are available may be configured as RSS. While the same issue is present with the PF as well (it's 16 for 82599 but it may be a different number for a different device) for VF it's more pronounced since it depends on the PF configuration.
>>> Don't u think it would be logical to add a specific filed for it in the dev_info struct?
>> Changchun: you are right, and we have already the max_rx_queues in dev_info,
>>
>> while negotiating between pf and vf, the negotiated max rx queue number will be set into hw->mac.max_rx_queues,
>>
>> And after that when you call ixgbe_dev_info_get, that value will be set into dev_info->max_rx_queues.
>>
>> Then you could get the number of queue all packets will distribute to by getting dev_info->max_rx_queues.
>> I'm afraid u've missed my point here. For instance, for a PF max_rx_queues will be set to 128 while u may only configure 16 RSS queues. The similar will happen for a VF in the 16 VF
>> configuration: max_rx_queues will be set to 8 while u may configure only 4 RSS queues.
>> This is why i suggested to add a separate info field... 😉
> Yes, I got your point this time, but the issue is that when I have 16 vf, and try to set max_rx_queues as 8, then no queue can rx any packet on vf,
> This is why I have to add a logic to refine the rx queue number from 8 to 4 queues.
> I have tried to do it in the way as you suggest, but unfortunately rx queue can't work.  If you find any other good method, pls let me know.

Pls., note that RSS is not the only multi-queue mode supported by both 
HW and DPDK - there is a DCB mode. This mode is also supported in the VF 
mode according to the same Table 7-3. And, according to the same table, 
there is a 8 TC per 16 pools mode. Therefore if a user desires to 
utilize all 8 available Rx queues of VF he/she could - in a DCB Rx mode.

Now looking at your code a bit mode deeply I see that u cut the number 
of Rx queues per pool down to 4 in a PF configuration when VMDQ_RSS mode 
is requested, which is ok but it still leaves the general issue open. 
Let's describe it in details for PF and VF separately:

For a PF:

  * When a user queries the PF he only gets the maximum number of Rx
    queues and he has no way to know what is a maximum set of RSS/DCB
    queues he/she may configure. E.g. for 82599 PF the maximum Rx queues
    number is 128 and the maximum RSS set size is 16 (see table 7-1 in
    the spec for all set of supported modes).
  * Therefore the user can't write a generic vendor independent code
    that will be able to configure RSS for a PF based on the current
    rte_eth_dev_info_get() output.

For a VF:

  * Similarly to PF above, if VF supports both RSS and DCB
    configurations, having the max_rx_queues is not enough since the
    maximum RSS set may be smaller from that number. "Luckily", 82599
    supports only either RSS or DCB VF configuration at the same time
    and this is configured globally during PF configuration but who said
    that later Intel's NICs or other provider's NICs supported by DCB
    are going to have the same limitation? So, in a general case, we
    find ourselves in the same uncertainty in a VF case like in a PF
    case above.
  * Currently, VF have no tools to know what is a PF multi-queue
    configuration (Table 7-3): is it RSS or DCB. Your patch-set sort of
    assumes that PF is accessible at the same level where VF DPDK code
    runs but this is not the case on the environments when SRIOV was
    originally targeting to - the virtualization environment, where the
    Guest code has no access whatsoever to the PF and may only query VF.
    AWS is one real-life example. So, when a DPDK code has to initialize
    an SRIOV VF in a Guest OS it lacks the information about both the
    available Rx MQ mode and it's capabilities (u may say we
    max_rx_queues for a capabilities but see the bullet above).


What I suggest to address all issues above is to add the following 
fields to the rte_eth_dev_info:

 1. rte_eth_rx_mq_mode rx_mode - for supported Rx MQ modes.
 2. uint16_t max_rss_queues - for the maximum RSS set size.
 3. uint16_t max_dcb_queues - for the maximum number of TCs.

These 3 new fields will clearly describe the Rx MQ capabilities of the 
function. The further correctness checking, like the specific RSS queues 
number configuration for VF, should be implemented as a error code 
returned from the rte_eth_dev_configure().


Pls., comment.
Vlad


> Thanks and regards,
> Changchun
>    

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS Ouyang Changchun
  2014-12-24 10:39       ` Vlad Zolotarov
@ 2015-01-04  2:10       ` Liang, Cunming
  2015-01-04  6:25         ` Ouyang, Changchun
  1 sibling, 1 reply; 144+ messages in thread
From: Liang, Cunming @ 2015-01-04  2:10 UTC (permalink / raw)
  To: Ouyang, Changchun, dev



> -----Original Message-----
> From: Ouyang, Changchun
> Sent: Wednesday, December 24, 2014 1:23 PM
> To: dev@dpdk.org
> Cc: Liang, Cunming; Cao, Waterman; Ouyang, Changchun
> Subject: [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF RSS.
> 
> The psrtype will determine how many queues the received packets will distribute
> to,
> and the value of psrtype should depends on both facet: max VF rxq number
> which
> has been negotiated with PF, and the number of rxq specified in config on guest.
> 
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>  lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
>  lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> ++++++++++++++++++++++++++++++++++-----
>  2 files changed, 97 insertions(+), 10 deletions(-)
> 
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> index cbb0145..9c9dad8 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> *eth_dev)
>  	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw->mac.num_rar_entries),
> 0);
>  	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw->mac.num_rar_entries),
> 0);
> 
> +	/*
> +	 * VF RSS can support at most 4 queues for each VF, even if
> +	 * 8 queues are available for each VF, it need refine to 4
> +	 * queues here due to this limitation, otherwise no queue
> +	 * will receive any packet even RSS is enabled.
> +	 */
> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> ETH_MQ_RX_VMDQ_RSS) {
> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> +			RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_32_POOLS;
> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> +				dev_num_vf(eth_dev) * 4;
> +		}
> +	}
> +
>  	/* set VMDq map to default PF pool */
>  	hw->mac.ops.set_vmdq(hw, 0,
> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> 
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> index f69abda..a7c17a4 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue
> *rxq)
>  }
> 
>  static int
> +ixgbe_config_vf_rss(struct rte_eth_dev *dev)
> +{
> +	struct ixgbe_hw *hw;
> +	uint32_t mrqc;
> +
> +	ixgbe_rss_configure(dev);
> +
> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +
> +	/* MRQC: enable VF RSS */
> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> +	case ETH_64_POOLS:
> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> +		break;
> +
> +	case ETH_32_POOLS:
> +	case ETH_16_POOLS:
> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> +		break;
> +
> +	default:
> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> +		return -EINVAL;
> +	}
> +
> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> +
> +	return 0;
> +}
> +
> +static int
>  ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>  {
>  	struct ixgbe_hw *hw =
> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev
> *dev)
>  			default: ixgbe_rss_disable(dev);
>  		}
>  	} else {
> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
>  		/*
>  		 * SRIOV active scheme
>  		 * FIXME if support DCB/RSS together with VMDq & SRIOV
>  		 */
> -		case ETH_64_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQEN);
> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> +		case ETH_MQ_RX_RSS:
> +		case ETH_MQ_RX_VMDQ_RSS:
> +			ixgbe_config_vf_rss(dev);
>  			break;
> 
> -		case ETH_32_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQRT4TCEN);
> -			break;
> +		default:
> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
[Liang, Cunming]  Just a minor comments. To avoid a switch branch inside another switch, we can have a  ixgbe_config_vf_default(),
which process all the things if no RSS/DCB required in multi-queue setting.
Then we can put all the 'switch(SRIOV(dev).active){...}'  in it.
> +			case ETH_64_POOLS:
> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +					IXGBE_MRQC_VMDQEN);
> +				break;
> 
> -		case ETH_16_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQRT8TCEN);
> +			case ETH_32_POOLS:
> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +					IXGBE_MRQC_VMDQRT4TCEN);
> +				break;
> +
> +			case ETH_16_POOLS:
> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +					IXGBE_MRQC_VMDQRT8TCEN);
> +				break;
> +			default:
> +				PMD_INIT_LOG(ERR,
> +					"invalid pool number in IOV mode");
> +				break;
> +			}
>  			break;
> -		default:
> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
>  		}
>  	}
> 
> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>  	uint16_t buf_size;
>  	uint16_t i;
>  	int ret;
> +	uint16_t valid_rxq_num;
> 
>  	PMD_INIT_FUNC_TRACE();
>  	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> 
> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues, hw-
> >mac.max_rx_queues);
> +
> +	/*
> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> +	 * and give user a hint that some packets may loss if it doesn't
> +	 * poll the queue where those packets are distributed to.
> +	 */
> +	if (valid_rxq_num == 3)
[Liang, Cunming] According to the inline comment, it makes more sense to use 'if (valid_rxq_num >= 3)'.
In case, the value returned by max_rx_queues is not less equal than 4. 
> +		valid_rxq_num = 4;
> +
> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> +			"it should be equal to or less than %d",
> +			valid_rxq_num);
> +		return -1;
> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> +			"than the number of available Rx queues:%d, "
> +			"packets in Rx queues(q_id >= %d) may loss.",
> +			valid_rxq_num, dev->data->nb_rx_queues);
> +
>  	/*
>  	 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
>  	 * disables the VF receipt of packets if the PF MTU is > 1500.
> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>  			IXGBE_PSRTYPE_IPV6HDR;
>  #endif
> 
> +	/* Set RQPL for VF RSS according to max Rx queue */
> +	psrtype |= (valid_rxq_num >> 1) <<
> +		IXGBE_PSRTYPE_RQPL_SHIFT;
>  	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> 
>  	if (dev->data->dev_conf.rxmode.enable_scatter) {
> --
> 1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2015-01-04  2:10       ` Liang, Cunming
@ 2015-01-04  6:25         ` Ouyang, Changchun
  0 siblings, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-04  6:25 UTC (permalink / raw)
  To: Liang, Cunming, dev

Hi Steve,

> -----Original Message-----
> From: Liang, Cunming
> Sent: Sunday, January 4, 2015 10:11 AM
> To: Ouyang, Changchun; dev@dpdk.org
> Cc: Cao, Waterman
> Subject: RE: [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> 
> 
> > -----Original Message-----
> > From: Ouyang, Changchun
> > Sent: Wednesday, December 24, 2014 1:23 PM
> > To: dev@dpdk.org
> > Cc: Liang, Cunming; Cao, Waterman; Ouyang, Changchun
> > Subject: [PATCH v3 5/6] ixgbe: Config VF RSS
> >
> > It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
> RSS.
> >
> > The psrtype will determine how many queues the received packets will
> > distribute to, and the value of psrtype should depends on both facet:
> > max VF rxq number which has been negotiated with PF, and the number of
> > rxq specified in config on guest.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >  lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> >  lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> > ++++++++++++++++++++++++++++++++++-----
> >  2 files changed, 97 insertions(+), 10 deletions(-)
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> > *eth_dev)
> >  	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> >mac.num_rar_entries),
> > 0);
> >  	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> >mac.num_rar_entries),
> > 0);
> >
> > +	/*
> > +	 * VF RSS can support at most 4 queues for each VF, even if
> > +	 * 8 queues are available for each VF, it need refine to 4
> > +	 * queues here due to this limitation, otherwise no queue
> > +	 * will receive any packet even RSS is enabled.
> > +	 */
> > +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> > ETH_MQ_RX_VMDQ_RSS) {
> > +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> > +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> ETH_32_POOLS;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> > +				dev_num_vf(eth_dev) * 4;
> > +		}
> > +	}
> > +
> >  	/* set VMDq map to default PF pool */
> >  	hw->mac.ops.set_vmdq(hw, 0,
> > RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > index f69abda..a7c17a4 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> igb_rx_queue
> > *rxq)
> >  }
> >
> >  static int
> > +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> > +	struct ixgbe_hw *hw;
> > +	uint32_t mrqc;
> > +
> > +	ixgbe_rss_configure(dev);
> > +
> > +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > +
> > +	/* MRQC: enable VF RSS */
> > +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> > +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> > +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > +	case ETH_64_POOLS:
> > +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> > +		break;
> > +
> > +	case ETH_32_POOLS:
> > +	case ETH_16_POOLS:
> > +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> > +		break;
> > +
> > +	default:
> > +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> > +		return -EINVAL;
> > +	}
> > +
> > +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> > +
> > +	return 0;
> > +}
> > +
> > +static int
> >  ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)  {
> >  	struct ixgbe_hw *hw =
> > @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
> rte_eth_dev
> > *dev)
> >  			default: ixgbe_rss_disable(dev);
> >  		}
> >  	} else {
> > -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >  		/*
> >  		 * SRIOV active scheme
> >  		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> >  		 */
> > -		case ETH_64_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > IXGBE_MRQC_VMDQEN);
> > +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> > +		case ETH_MQ_RX_RSS:
> > +		case ETH_MQ_RX_VMDQ_RSS:
> > +			ixgbe_config_vf_rss(dev);
> >  			break;
> >
> > -		case ETH_32_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > IXGBE_MRQC_VMDQRT4TCEN);
> > -			break;
> > +		default:
> > +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
> [Liang, Cunming]  Just a minor comments. To avoid a switch branch inside
> another switch, we can have a  ixgbe_config_vf_default(), which process all
> the things if no RSS/DCB required in multi-queue setting.
> Then we can put all the 'switch(SRIOV(dev).active){...}'  in it.

Yes, will resolve it in v4 patch.

> > +			case ETH_64_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQEN);
> > +				break;
> >
> > -		case ETH_16_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > IXGBE_MRQC_VMDQRT8TCEN);
> > +			case ETH_32_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQRT4TCEN);
> > +				break;
> > +
> > +			case ETH_16_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQRT8TCEN);
> > +				break;
> > +			default:
> > +				PMD_INIT_LOG(ERR,
> > +					"invalid pool number in IOV mode");
> > +				break;
> > +			}
> >  			break;
> > -		default:
> > -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
> mode");
> >  		}
> >  	}
> >
> > @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >  	uint16_t buf_size;
> >  	uint16_t i;
> >  	int ret;
> > +	uint16_t valid_rxq_num;
> >
> >  	PMD_INIT_FUNC_TRACE();
> >  	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >
> > +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues, hw-
> > >mac.max_rx_queues);
> > +
> > +	/*
> > +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> > +	 * and give user a hint that some packets may loss if it doesn't
> > +	 * poll the queue where those packets are distributed to.
> > +	 */
> > +	if (valid_rxq_num == 3)
> [Liang, Cunming] According to the inline comment, it makes more sense to
> use 'if (valid_rxq_num >= 3)'.
> In case, the value returned by max_rx_queues is not less equal than 4.

This will be resolved in v4 patch.
Thanks
Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v4 0/6] Enable VF RSS for Niantic
  2014-12-24  5:22   ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Ouyang Changchun
                       ` (7 preceding siblings ...)
  2014-12-24 10:49     ` Vlad Zolotarov
@ 2015-01-04  7:18     ` Ouyang Changchun
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 1/6] ixgbe: Code cleanup Ouyang Changchun
                         ` (6 more replies)
  8 siblings, 7 replies; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-04  7:18 UTC (permalink / raw)
  To: dev

This patch enables VF RSS for Niantic, which allow each VF having at most 4 queues.
The actual queue number per VF depends on the total number of pool, which is
determined by the max number of VF at PF initialization stage and the number of
queue specified in config:
1) If the max number of VF is in the range from 1 to 32, and the number of rxq is 4
('--rxq 4' in testpmd), then there is totally 32 pools(ETH_32_POOLS), and each VF 
have 4 queues;
 
2)If the max number of VF is in the range from 33 to 64, and the number of rxq is 2
('--rxq 2' in testpmd), then there is totally 64 pools(ETH_64_POOLS), and each VF 
have 2 queues;
 
On host, to enable VF RSS functionality, rx mq mode should be set as ETH_MQ_RX_VMDQ_RSS
or ETH_MQ_RX_RSS mode, and SRIOV mode should be activated(max_vfs >= 1).
It also needs config VF RSS information like hash function, RSS key, RSS key length.
 
The limitation for Niantic VF RSS is:
the hash and key are shared among PF and all VF, the RETA table with 128 entries are
also shared among PF and all VF. So it could not to provide a method to query the hash 
and reta content per VF on guest, while, if possible, please query them on host(PF) for
the shared RETA information.
 
v4 change:
  - Extract a function to remove embeded switch-case statement;
  - Check whether RX queue number is a valid one, otherwise return error;
  - Update the description a bit;
  
v3 change:
  - More cleanup;
 
v2 change:
  - Update the description;
  - Use receiving queue number('--rxq <q-num>') specified in config to determine the 
    number of pool and the number of queue per VF;
 
v1 change:
  - Config VF RSS;

Changchun Ouyang (6):
  ixgbe: Code cleanup
  ixgbe: Negotiate VF API version
  ixgbe: Get VF queue number
  ether: Check VMDq RSS mode
  ixgbe: Config VF RSS
  testpmd: Set Rx VMDq RSS mode

 app/test-pmd/testpmd.c              |  10 +++
 lib/librte_ether/rte_ethdev.c       |  39 ++++++++--
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |   1 +
 lib/librte_pmd_ixgbe/ixgbe_pf.c     |  75 +++++++++++++++++++-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c   | 138 ++++++++++++++++++++++++++++--------
 5 files changed, 228 insertions(+), 35 deletions(-)

-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v4 1/6] ixgbe: Code cleanup
  2015-01-04  7:18     ` [dpdk-dev] [PATCH v4 " Ouyang Changchun
@ 2015-01-04  7:18       ` Ouyang Changchun
  2015-01-04  8:22         ` Vlad Zolotarov
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
                         ` (5 subsequent siblings)
  6 siblings, 1 reply; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-04  7:18 UTC (permalink / raw)
  To: dev

Put global register configuring out of loop for queue; also fix typo and indent;
Also fix typo and indent.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 5c36bff..f69abda 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3548,9 +3548,9 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
 				IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
 			}
 			srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
-				   IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
-				  IXGBE_SRRCTL_BSIZEHDR_MASK);
-			srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+				IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
+				IXGBE_SRRCTL_BSIZEHDR_MASK);
+			srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
 		} else
 #endif
 			srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
@@ -3985,7 +3985,7 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 	struct igb_rx_queue *rxq;
 	struct rte_pktmbuf_pool_private *mbp_priv;
 	uint64_t bus_addr;
-	uint32_t srrctl;
+	uint32_t srrctl, psrtype = 0;
 	uint16_t buf_size;
 	uint16_t i;
 	int ret;
@@ -4039,20 +4039,10 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		 * Configure Header Split
 		 */
 		if (dev->data->dev_conf.rxmode.header_split) {
-
-			/* Must setup the PSRTYPE register */
-			uint32_t psrtype;
-			psrtype = IXGBE_PSRTYPE_TCPHDR |
-				IXGBE_PSRTYPE_UDPHDR   |
-				IXGBE_PSRTYPE_IPV4HDR  |
-				IXGBE_PSRTYPE_IPV6HDR;
-
-			IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
-
 			srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
-				   IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
-				  IXGBE_SRRCTL_BSIZEHDR_MASK);
-			srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+				IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
+				IXGBE_SRRCTL_BSIZEHDR_MASK);
+			srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
 		} else
 #endif
 			srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
@@ -4095,6 +4085,17 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		}
 	}
 
+#ifdef RTE_HEADER_SPLIT_ENABLE
+	if (dev->data->dev_conf.rxmode.header_split)
+		/* Must setup the PSRTYPE register */
+		psrtype = IXGBE_PSRTYPE_TCPHDR |
+			IXGBE_PSRTYPE_UDPHDR   |
+			IXGBE_PSRTYPE_IPV4HDR  |
+			IXGBE_PSRTYPE_IPV6HDR;
+#endif
+
+	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
+
 	if (dev->data->dev_conf.rxmode.enable_scatter) {
 		if (!dev->data->scattered_rx)
 			PMD_INIT_LOG(DEBUG, "forcing scatter mode");
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API version
  2015-01-04  7:18     ` [dpdk-dev] [PATCH v4 " Ouyang Changchun
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 1/6] ixgbe: Code cleanup Ouyang Changchun
@ 2015-01-04  7:18       ` Ouyang Changchun
  2015-01-04  8:26         ` Vlad Zolotarov
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number Ouyang Changchun
                         ` (4 subsequent siblings)
  6 siblings, 1 reply; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-04  7:18 UTC (permalink / raw)
  To: dev

Negotiate API version with VF when receiving the IXGBE_VF_API_NEGOTIATE message.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  1 +
 lib/librte_pmd_ixgbe/ixgbe_pf.c     | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
index ca99170..730098d 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
@@ -159,6 +159,7 @@ struct ixgbe_vf_info {
 	uint16_t tx_rate[IXGBE_MAX_QUEUE_NUM_PER_VF];
 	uint16_t vlan_count;
 	uint8_t spoofchk_enabled;
+	uint8_t api_version;
 };
 
 /*
diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index 51da1fd..495aff5 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -469,6 +469,28 @@ ixgbe_set_vf_lpe(struct rte_eth_dev *dev, __rte_unused uint32_t vf, uint32_t *ms
 }
 
 static int
+ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
+{
+	uint32_t api_version = msgbuf[1];
+	struct ixgbe_vf_info *vfinfo =
+		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
+
+	switch (api_version) {
+	case ixgbe_mbox_api_10:
+	case ixgbe_mbox_api_11:
+		vfinfo[vf].api_version = (uint8_t)api_version;
+		return 0;
+	default:
+		break;
+	}
+
+	RTE_LOG(ERR, PMD, "Negotiate invalid api version %u from VF %d\n",
+		api_version, vf);
+
+	return -1;
+}
+
+static int
 ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 {
 	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
@@ -512,6 +534,9 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 	case IXGBE_VF_SET_VLAN:
 		retval = ixgbe_vf_set_vlan(dev, vf, msgbuf);
 		break;
+	case IXGBE_VF_API_NEGOTIATE:
+		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
+		break;
 	default:
 		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
  2015-01-04  7:18     ` [dpdk-dev] [PATCH v4 " Ouyang Changchun
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 1/6] ixgbe: Code cleanup Ouyang Changchun
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
@ 2015-01-04  7:18       ` Ouyang Changchun
  2015-01-04  8:38         ` Vlad Zolotarov
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode Ouyang Changchun
                         ` (3 subsequent siblings)
  6 siblings, 1 reply; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-04  7:18 UTC (permalink / raw)
  To: dev

Get the available Rx and Tx queue number when receiving IXGBE_VF_GET_QUEUES message from VF.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_pf.c | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index 495aff5..cbb0145 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -53,6 +53,8 @@
 #include "ixgbe_ethdev.h"
 
 #define IXGBE_MAX_VFTA     (128)
+#define IXGBE_VF_MSG_SIZE_DEFAULT 1
+#define IXGBE_VF_GET_QUEUE_MSG_SIZE 5
 
 static inline uint16_t
 dev_num_vf(struct rte_eth_dev *eth_dev)
@@ -491,9 +493,36 @@ ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
 }
 
 static int
+ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
+{
+	struct ixgbe_vf_info *vfinfo =
+		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
+	uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+
+	/* Verify if the PF supports the mbox APIs version or not */
+	switch (vfinfo[vf].api_version) {
+	case ixgbe_mbox_api_20:
+	case ixgbe_mbox_api_11:
+		break;
+	default:
+		return -1;
+	}
+
+	/* Notify VF of Rx and Tx queue number */
+	msgbuf[IXGBE_VF_RX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+	msgbuf[IXGBE_VF_TX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+
+	/* Notify VF of default queue */
+	msgbuf[IXGBE_VF_DEF_QUEUE] = default_q;
+
+	return 0;
+}
+
+static int
 ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 {
 	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
+	uint16_t msg_size = IXGBE_VF_MSG_SIZE_DEFAULT;
 	uint32_t msgbuf[IXGBE_VFMAILBOX_SIZE];
 	int32_t retval;
 	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -537,6 +566,10 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 	case IXGBE_VF_API_NEGOTIATE:
 		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
 		break;
+	case IXGBE_VF_GET_QUEUES:
+		retval = ixgbe_get_vf_queues(dev, vf, msgbuf);
+		msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE;
+		break;
 	default:
 		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
@@ -551,7 +584,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 
 	msgbuf[0] |= IXGBE_VT_MSGTYPE_CTS;
 
-	ixgbe_write_mbx(hw, msgbuf, 1, vf);
+	ixgbe_write_mbx(hw, msgbuf, msg_size, vf);
 
 	return retval;
 }
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
  2015-01-04  7:18     ` [dpdk-dev] [PATCH v4 " Ouyang Changchun
                         ` (2 preceding siblings ...)
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number Ouyang Changchun
@ 2015-01-04  7:18       ` Ouyang Changchun
  2015-01-04  8:45         ` Vlad Zolotarov
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 5/6] ixgbe: Config VF RSS Ouyang Changchun
                         ` (2 subsequent siblings)
  6 siblings, 1 reply; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-04  7:18 UTC (permalink / raw)
  To: dev

Check mq mode for VMDq RSS, handle it correctly instead of returning an error;
Also remove the limitation of per pool queue number has max value of 1, because
the per pool queue number could be 2 or 4 if it is VMDq RSS mode;

The number of rxq specified in config will determine the mq mode for VMDq RSS.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_ether/rte_ethdev.c | 39 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 95f2ceb..59ff325 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -510,8 +510,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 
 	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
 		/* check multi-queue mode */
-		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
-		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
+		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
 		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||
 		    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
 			/* SRIOV only works in VMDq enable mode */
@@ -525,7 +524,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		}
 
 		switch (dev_conf->rxmode.mq_mode) {
-		case ETH_MQ_RX_VMDQ_RSS:
 		case ETH_MQ_RX_VMDQ_DCB:
 		case ETH_MQ_RX_VMDQ_DCB_RSS:
 			/* DCB/RSS VMDQ in SRIOV mode, not implement yet */
@@ -534,6 +532,39 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 					"unsupported VMDQ mq_mode rx %u\n",
 					port_id, dev_conf->rxmode.mq_mode);
 			return (-EINVAL);
+		case ETH_MQ_RX_RSS:
+			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
+					" SRIOV active, "
+					"Rx mq mode is changed from:"
+					"mq_mode %u into VMDQ mq_mode %u\n",
+					port_id,
+					dev_conf->rxmode.mq_mode,
+					dev->data->dev_conf.rxmode.mq_mode);
+		case ETH_MQ_RX_VMDQ_RSS:
+			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_RSS;
+			if (nb_rx_q < RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) {
+				switch (nb_rx_q) {
+				case 1:
+				case 2:
+					RTE_ETH_DEV_SRIOV(dev).active =
+						ETH_64_POOLS;
+					break;
+				case 4:
+					RTE_ETH_DEV_SRIOV(dev).active =
+						ETH_32_POOLS;
+					break;
+				default:
+					PMD_DEBUG_TRACE("ethdev port_id=%d"
+						" SRIOV active, "
+						"queue number invalid\n",
+						port_id);
+					return -EINVAL;
+				}
+				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
+				RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
+					dev->pci_dev->max_vfs * nb_rx_q;
+			}
+			break;
 		default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
 			/* if nothing mq mode configure, use default scheme */
 			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;
@@ -553,8 +584,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
 			/* if nothing mq mode configure, use default scheme */
 			dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;
-			if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
-				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
 			break;
 		}
 
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v4 5/6] ixgbe: Config VF RSS
  2015-01-04  7:18     ` [dpdk-dev] [PATCH v4 " Ouyang Changchun
                         ` (3 preceding siblings ...)
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode Ouyang Changchun
@ 2015-01-04  7:18       ` Ouyang Changchun
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
  2015-01-07  6:32       ` [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic Ouyang Changchun
  6 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-04  7:18 UTC (permalink / raw)
  To: dev

It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF RSS.

The psrtype will determine how many queues the received packets will distribute to,
and the value of psrtype should depends on both facet: max VF rxq number which
has been negotiated with PF, and the number of rxq specified in config on guest.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

Changes in v4:
 - the number of rxq from config should be power of 2 and should not bigger than 
   max VF rxq number(negotiated between guest and host).

---
 lib/librte_pmd_ixgbe/ixgbe_pf.c   |  15 ++++++
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 103 +++++++++++++++++++++++++++++++++-----
 2 files changed, 106 insertions(+), 12 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index cbb0145..9c9dad8 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev *eth_dev)
 	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw->mac.num_rar_entries), 0);
 	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw->mac.num_rar_entries), 0);
 
+	/*
+	 * VF RSS can support at most 4 queues for each VF, even if
+	 * 8 queues are available for each VF, it need refine to 4
+	 * queues here due to this limitation, otherwise no queue
+	 * will receive any packet even RSS is enabled.
+	 */
+	if (eth_dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_RSS) {
+		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
+			RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_32_POOLS;
+			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
+			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
+				dev_num_vf(eth_dev) * 4;
+		}
+	}
+
 	/* set VMDq map to default PF pool */
 	hw->mac.ops.set_vmdq(hw, 0, RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
 
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index f69abda..e83a9ab 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3327,6 +3327,68 @@ ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
 }
 
 static int
+ixgbe_config_vf_rss(struct rte_eth_dev *dev)
+{
+	struct ixgbe_hw *hw;
+	uint32_t mrqc;
+
+	ixgbe_rss_configure(dev);
+
+	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+	/* MRQC: enable VF RSS */
+	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
+	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
+	switch (RTE_ETH_DEV_SRIOV(dev).active) {
+	case ETH_64_POOLS:
+		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
+		break;
+
+	case ETH_32_POOLS:
+	case ETH_16_POOLS:
+		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
+		break;
+
+	default:
+		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
+		return -EINVAL;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+
+	return 0;
+}
+
+static int
+ixgbe_config_vf_default(struct rte_eth_dev *dev)
+{
+	struct ixgbe_hw *hw =
+		IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+	switch (RTE_ETH_DEV_SRIOV(dev).active) {
+	case ETH_64_POOLS:
+		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+			IXGBE_MRQC_VMDQEN);
+		break;
+
+	case ETH_32_POOLS:
+		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+			IXGBE_MRQC_VMDQRT4TCEN);
+		break;
+
+	case ETH_16_POOLS:
+		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+			IXGBE_MRQC_VMDQRT8TCEN);
+		break;
+	default:
+		PMD_INIT_LOG(ERR,
+			"invalid pool number in IOV mode");
+		break;
+	}
+	return 0;
+}
+
+static int
 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw *hw =
@@ -3358,24 +3420,25 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
 			default: ixgbe_rss_disable(dev);
 		}
 	} else {
-		switch (RTE_ETH_DEV_SRIOV(dev).active) {
 		/*
 		 * SRIOV active scheme
-		 * FIXME if support DCB/RSS together with VMDq & SRIOV
+		 * Support RSS together with VMDq & SRIOV
 		 */
-		case ETH_64_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQEN);
-			break;
-
-		case ETH_32_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT4TCEN);
+		switch (dev->data->dev_conf.rxmode.mq_mode) {
+		case ETH_MQ_RX_RSS:
+		case ETH_MQ_RX_VMDQ_RSS:
+			ixgbe_config_vf_rss(dev);
 			break;
 
-		case ETH_16_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT8TCEN);
-			break;
+		/* FIXME if support DCB/RSS together with VMDq & SRIOV */
+		case ETH_MQ_RX_VMDQ_DCB:
+		case ETH_MQ_RX_VMDQ_DCB_RSS:
+			PMD_INIT_LOG(ERR,
+				"Could not support DCB with VMDq & SRIOV");
+			return -1;
 		default:
-			PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
+			ixgbe_config_vf_default(dev);
+			break;
 		}
 	}
 
@@ -3993,6 +4056,19 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 	PMD_INIT_FUNC_TRACE();
 	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
+	if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
+		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
+			"it should be power of 2");
+		return -1;
+	}
+
+	if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
+		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
+			"it should be equal to or less than %d",
+			hw->mac.max_rx_queues);
+		return -1;
+	}
+
 	/*
 	 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
 	 * disables the VF receipt of packets if the PF MTU is > 1500.
@@ -4094,6 +4170,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 			IXGBE_PSRTYPE_IPV6HDR;
 #endif
 
+	/* Set RQPL for VF RSS according to max Rx queue */
+	psrtype |= (dev->data->nb_rx_queues >> 1) <<
+		IXGBE_PSRTYPE_RQPL_SHIFT;
 	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
 
 	if (dev->data->dev_conf.rxmode.enable_scatter) {
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
  2015-01-04  7:18     ` [dpdk-dev] [PATCH v4 " Ouyang Changchun
                         ` (4 preceding siblings ...)
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 5/6] ixgbe: Config VF RSS Ouyang Changchun
@ 2015-01-04  7:18       ` Ouyang Changchun
  2015-01-04  8:49         ` Vlad Zolotarov
  2015-01-07  6:32       ` [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic Ouyang Changchun
  6 siblings, 1 reply; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-04  7:18 UTC (permalink / raw)
  To: dev

Set VMDq RSS mode if it has VF(VF number is more than 1) and has RSS information.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 app/test-pmd/testpmd.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 8c69756..6230f8b 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -1708,6 +1708,16 @@ init_port_config(void)
 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
 		}
 
+		if (port->dev_info.max_vfs != 0) {
+			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
+				port->dev_conf.rxmode.mq_mode =
+					ETH_MQ_RX_VMDQ_RSS;
+			else {
+				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
+				port->dev_conf.txmode.mq_mode = ETH_MQ_TX_NONE;
+			}
+		}
+
 		port->rx_conf.rx_thresh = rx_thresh;
 		port->rx_conf.rx_free_thresh = rx_free_thresh;
 		port->rx_conf.rx_drop_en = rx_drop_en;
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/6] ixgbe: Code cleanup
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 1/6] ixgbe: Code cleanup Ouyang Changchun
@ 2015-01-04  8:22         ` Vlad Zolotarov
  0 siblings, 0 replies; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-04  8:22 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 01/04/15 09:18, Ouyang Changchun wrote:
> Put global register configuring out of loop for queue; also fix typo and indent;
> Also fix typo and indent.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

Reviewed-by: Vlad Zolotarov <vladz@cloudius-systems.com>

> ---
>   lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 35 ++++++++++++++++++-----------------
>   1 file changed, 18 insertions(+), 17 deletions(-)
>
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> index 5c36bff..f69abda 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> @@ -3548,9 +3548,9 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
>   				IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
>   			}
>   			srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
> -				   IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
> -				  IXGBE_SRRCTL_BSIZEHDR_MASK);
> -			srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
> +				IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
> +				IXGBE_SRRCTL_BSIZEHDR_MASK);
> +			srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
>   		} else
>   #endif
>   			srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
> @@ -3985,7 +3985,7 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>   	struct igb_rx_queue *rxq;
>   	struct rte_pktmbuf_pool_private *mbp_priv;
>   	uint64_t bus_addr;
> -	uint32_t srrctl;
> +	uint32_t srrctl, psrtype = 0;
>   	uint16_t buf_size;
>   	uint16_t i;
>   	int ret;
> @@ -4039,20 +4039,10 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>   		 * Configure Header Split
>   		 */
>   		if (dev->data->dev_conf.rxmode.header_split) {
> -
> -			/* Must setup the PSRTYPE register */
> -			uint32_t psrtype;
> -			psrtype = IXGBE_PSRTYPE_TCPHDR |
> -				IXGBE_PSRTYPE_UDPHDR   |
> -				IXGBE_PSRTYPE_IPV4HDR  |
> -				IXGBE_PSRTYPE_IPV6HDR;
> -
> -			IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
> -
>   			srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
> -				   IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
> -				  IXGBE_SRRCTL_BSIZEHDR_MASK);
> -			srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
> +				IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
> +				IXGBE_SRRCTL_BSIZEHDR_MASK);
> +			srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
>   		} else
>   #endif
>   			srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
> @@ -4095,6 +4085,17 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>   		}
>   	}
>   
> +#ifdef RTE_HEADER_SPLIT_ENABLE
> +	if (dev->data->dev_conf.rxmode.header_split)
> +		/* Must setup the PSRTYPE register */
> +		psrtype = IXGBE_PSRTYPE_TCPHDR |
> +			IXGBE_PSRTYPE_UDPHDR   |
> +			IXGBE_PSRTYPE_IPV4HDR  |
> +			IXGBE_PSRTYPE_IPV6HDR;
> +#endif
> +
> +	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> +
>   	if (dev->data->dev_conf.rxmode.enable_scatter) {
>   		if (!dev->data->scattered_rx)
>   			PMD_INIT_LOG(DEBUG, "forcing scatter mode");

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API version
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
@ 2015-01-04  8:26         ` Vlad Zolotarov
  2015-01-04  8:30           ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-04  8:26 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 01/04/15 09:18, Ouyang Changchun wrote:
> Negotiate API version with VF when receiving the IXGBE_VF_API_NEGOTIATE message.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

Reviewed-by: Vlad Zolotarov <vladz@cloudius-systems.com>

> ---
>   lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  1 +
>   lib/librte_pmd_ixgbe/ixgbe_pf.c     | 25 +++++++++++++++++++++++++
>   2 files changed, 26 insertions(+)
>
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> index ca99170..730098d 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> @@ -159,6 +159,7 @@ struct ixgbe_vf_info {
>   	uint16_t tx_rate[IXGBE_MAX_QUEUE_NUM_PER_VF];
>   	uint16_t vlan_count;
>   	uint8_t spoofchk_enabled;
> +	uint8_t api_version;
>   };
>   
>   /*
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> index 51da1fd..495aff5 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> @@ -469,6 +469,28 @@ ixgbe_set_vf_lpe(struct rte_eth_dev *dev, __rte_unused uint32_t vf, uint32_t *ms
>   }
>   
>   static int
> +ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
> +{
> +	uint32_t api_version = msgbuf[1];
> +	struct ixgbe_vf_info *vfinfo =
> +		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
> +
> +	switch (api_version) {
> +	case ixgbe_mbox_api_10:
> +	case ixgbe_mbox_api_11:
> +		vfinfo[vf].api_version = (uint8_t)api_version;
> +		return 0;
> +	default:
> +		break;
> +	}
> +
> +	RTE_LOG(ERR, PMD, "Negotiate invalid api version %u from VF %d\n",
> +		api_version, vf);
> +
> +	return -1;
> +}
> +
> +static int
>   ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
>   {
>   	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
> @@ -512,6 +534,9 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
>   	case IXGBE_VF_SET_VLAN:
>   		retval = ixgbe_vf_set_vlan(dev, vf, msgbuf);
>   		break;
> +	case IXGBE_VF_API_NEGOTIATE:
> +		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
> +		break;
>   	default:
>   		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
>   		retval = IXGBE_ERR_MBX;

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API version
  2015-01-04  8:26         ` Vlad Zolotarov
@ 2015-01-04  8:30           ` Vlad Zolotarov
  2015-01-04  8:37             ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-04  8:30 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 01/04/15 10:26, Vlad Zolotarov wrote:
>
> On 01/04/15 09:18, Ouyang Changchun wrote:
>> Negotiate API version with VF when receiving the 
>> IXGBE_VF_API_NEGOTIATE message.
>>
>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>
> Reviewed-by: Vlad Zolotarov <vladz@cloudius-systems.com>

One small remark below.

>
>> ---
>>   lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  1 +
>>   lib/librte_pmd_ixgbe/ixgbe_pf.c     | 25 +++++++++++++++++++++++++
>>   2 files changed, 26 insertions(+)
>>
>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h 
>> b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
>> index ca99170..730098d 100644
>> --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
>> +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
>> @@ -159,6 +159,7 @@ struct ixgbe_vf_info {
>>       uint16_t tx_rate[IXGBE_MAX_QUEUE_NUM_PER_VF];
>>       uint16_t vlan_count;
>>       uint8_t spoofchk_enabled;
>> +    uint8_t api_version;
>>   };
>>     /*
>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c 
>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>> index 51da1fd..495aff5 100644
>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>> @@ -469,6 +469,28 @@ ixgbe_set_vf_lpe(struct rte_eth_dev *dev, 
>> __rte_unused uint32_t vf, uint32_t *ms
>>   }
>>     static int
>> +ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, 
>> uint32_t *msgbuf)
>> +{
>> +    uint32_t api_version = msgbuf[1];
>> +    struct ixgbe_vf_info *vfinfo =
>> + *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
>> +
>> +    switch (api_version) {
>> +    case ixgbe_mbox_api_10:
>> +    case ixgbe_mbox_api_11:

Why version 2.0 is not negotiated?

>> +        vfinfo[vf].api_version = (uint8_t)api_version;
>> +        return 0;
>> +    default:
>> +        break;
>> +    }
>> +
>> +    RTE_LOG(ERR, PMD, "Negotiate invalid api version %u from VF %d\n",
>> +        api_version, vf);
>> +
>> +    return -1;
>> +}
>> +
>> +static int
>>   ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
>>   {
>>       uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
>> @@ -512,6 +534,9 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, 
>> uint16_t vf)
>>       case IXGBE_VF_SET_VLAN:
>>           retval = ixgbe_vf_set_vlan(dev, vf, msgbuf);
>>           break;
>> +    case IXGBE_VF_API_NEGOTIATE:
>> +        retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
>> +        break;
>>       default:
>>           PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", 
>> (unsigned)msgbuf[0]);
>>           retval = IXGBE_ERR_MBX;
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API version
  2015-01-04  8:30           ` Vlad Zolotarov
@ 2015-01-04  8:37             ` Ouyang, Changchun
  2015-01-04  8:40               ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-04  8:37 UTC (permalink / raw)
  To: Vlad Zolotarov, dev

Hi Vlad,

> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Sunday, January 4, 2015 4:30 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API version
> 
> 
> On 01/04/15 10:26, Vlad Zolotarov wrote:
> >
> > On 01/04/15 09:18, Ouyang Changchun wrote:
> >> Negotiate API version with VF when receiving the
> >> IXGBE_VF_API_NEGOTIATE message.
> >>
> >> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >
> > Reviewed-by: Vlad Zolotarov <vladz@cloudius-systems.com>

Thanks for your reviewing.

> 
> One small remark below.
> 
> >
> >> ---
> >>   lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  1 +
> >>   lib/librte_pmd_ixgbe/ixgbe_pf.c     | 25 +++++++++++++++++++++++++
> >>   2 files changed, 26 insertions(+)
> >>
> >> diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> >> b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> >> index ca99170..730098d 100644
> >> --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> >> +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> >> @@ -159,6 +159,7 @@ struct ixgbe_vf_info {
> >>       uint16_t tx_rate[IXGBE_MAX_QUEUE_NUM_PER_VF];
> >>       uint16_t vlan_count;
> >>       uint8_t spoofchk_enabled;
> >> +    uint8_t api_version;
> >>   };
> >>     /*
> >> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..495aff5 100644
> >> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >> @@ -469,6 +469,28 @@ ixgbe_set_vf_lpe(struct rte_eth_dev *dev,
> >> __rte_unused uint32_t vf, uint32_t *ms
> >>   }
> >>     static int
> >> +ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf,
> >> uint32_t *msgbuf)
> >> +{
> >> +    uint32_t api_version = msgbuf[1];
> >> +    struct ixgbe_vf_info *vfinfo =
> >> + *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
> >> +
> >> +    switch (api_version) {
> >> +    case ixgbe_mbox_api_10:
> >> +    case ixgbe_mbox_api_11:
> 
> Why version 2.0 is not negotiated?
> 
Because it doesn't fully support 2.0 features yet.

Thanks
Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number Ouyang Changchun
@ 2015-01-04  8:38         ` Vlad Zolotarov
  2015-01-05  2:59           ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-04  8:38 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 01/04/15 09:18, Ouyang Changchun wrote:
> Get the available Rx and Tx queue number when receiving IXGBE_VF_GET_QUEUES message from VF.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>   lib/librte_pmd_ixgbe/ixgbe_pf.c | 35 ++++++++++++++++++++++++++++++++++-
>   1 file changed, 34 insertions(+), 1 deletion(-)
>
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> index 495aff5..cbb0145 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> @@ -53,6 +53,8 @@
>   #include "ixgbe_ethdev.h"
>   
>   #define IXGBE_MAX_VFTA     (128)
> +#define IXGBE_VF_MSG_SIZE_DEFAULT 1
> +#define IXGBE_VF_GET_QUEUE_MSG_SIZE 5
>   
>   static inline uint16_t
>   dev_num_vf(struct rte_eth_dev *eth_dev)
> @@ -491,9 +493,36 @@ ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
>   }
>   
>   static int
> +ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
> +{
> +	struct ixgbe_vf_info *vfinfo =
> +		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
> +	uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> +
> +	/* Verify if the PF supports the mbox APIs version or not */
> +	switch (vfinfo[vf].api_version) {
> +	case ixgbe_mbox_api_20:
> +	case ixgbe_mbox_api_11:
> +		break;
> +	default:
> +		return -1;
> +	}
> +
> +	/* Notify VF of Rx and Tx queue number */
> +	msgbuf[IXGBE_VF_RX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> +	msgbuf[IXGBE_VF_TX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> +
> +	/* Notify VF of default queue */
> +	msgbuf[IXGBE_VF_DEF_QUEUE] = default_q;

What about IXGBE_VF_TRANS_VLAN field?

> +
> +	return 0;
> +}
> +
> +static int
>   ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
>   {
>   	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
> +	uint16_t msg_size = IXGBE_VF_MSG_SIZE_DEFAULT;
>   	uint32_t msgbuf[IXGBE_VFMAILBOX_SIZE];
>   	int32_t retval;
>   	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> @@ -537,6 +566,10 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
>   	case IXGBE_VF_API_NEGOTIATE:
>   		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
>   		break;
> +	case IXGBE_VF_GET_QUEUES:
> +		retval = ixgbe_get_vf_queues(dev, vf, msgbuf);
> +		msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE;

Although the msg_size semantics and motivation is clear, if u want to do 
then do it all the way - add it to all other cases too not just to 
IXGBE_VF_GET_QUEUES.
For instance, why do u write all 16 DWORDS for API negotiation (only 2 
are required) and only here u decided to get "greedy"? ;)

My point is: either drop it completely or fix all other places as well.

> +		break;
>   	default:
>   		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
>   		retval = IXGBE_ERR_MBX;
> @@ -551,7 +584,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
>   
>   	msgbuf[0] |= IXGBE_VT_MSGTYPE_CTS;
>   
> -	ixgbe_write_mbx(hw, msgbuf, 1, vf);
> +	ixgbe_write_mbx(hw, msgbuf, msg_size, vf);
>   
>   	return retval;
>   }

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API version
  2015-01-04  8:37             ` Ouyang, Changchun
@ 2015-01-04  8:40               ` Vlad Zolotarov
  2015-01-04  8:51                 ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-04  8:40 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 01/04/15 10:37, Ouyang, Changchun wrote:
> Hi Vlad,
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Sunday, January 4, 2015 4:30 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API version
>>
>>
>> On 01/04/15 10:26, Vlad Zolotarov wrote:
>>> On 01/04/15 09:18, Ouyang Changchun wrote:
>>>> Negotiate API version with VF when receiving the
>>>> IXGBE_VF_API_NEGOTIATE message.
>>>>
>>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>> Reviewed-by: Vlad Zolotarov <vladz@cloudius-systems.com>
> Thanks for your reviewing.
>
>> One small remark below.
>>
>>>> ---
>>>>    lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  1 +
>>>>    lib/librte_pmd_ixgbe/ixgbe_pf.c     | 25 +++++++++++++++++++++++++
>>>>    2 files changed, 26 insertions(+)
>>>>
>>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
>>>> b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
>>>> index ca99170..730098d 100644
>>>> --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
>>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
>>>> @@ -159,6 +159,7 @@ struct ixgbe_vf_info {
>>>>        uint16_t tx_rate[IXGBE_MAX_QUEUE_NUM_PER_VF];
>>>>        uint16_t vlan_count;
>>>>        uint8_t spoofchk_enabled;
>>>> +    uint8_t api_version;
>>>>    };
>>>>      /*
>>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..495aff5 100644
>>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>> @@ -469,6 +469,28 @@ ixgbe_set_vf_lpe(struct rte_eth_dev *dev,
>>>> __rte_unused uint32_t vf, uint32_t *ms
>>>>    }
>>>>      static int
>>>> +ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf,
>>>> uint32_t *msgbuf)
>>>> +{
>>>> +    uint32_t api_version = msgbuf[1];
>>>> +    struct ixgbe_vf_info *vfinfo =
>>>> + *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
>>>> +
>>>> +    switch (api_version) {
>>>> +    case ixgbe_mbox_api_10:
>>>> +    case ixgbe_mbox_api_11:
>> Why version 2.0 is not negotiated?
>>
> Because it doesn't fully support 2.0 features yet.

Well, it that case u should not support 2.0 in patch 3 as well.

>
> Thanks
> Changchun
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode Ouyang Changchun
@ 2015-01-04  8:45         ` Vlad Zolotarov
  2015-01-04  8:58           ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-04  8:45 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 01/04/15 09:18, Ouyang Changchun wrote:
> Check mq mode for VMDq RSS, handle it correctly instead of returning an error;
> Also remove the limitation of per pool queue number has max value of 1, because
> the per pool queue number could be 2 or 4 if it is VMDq RSS mode;
>
> The number of rxq specified in config will determine the mq mode for VMDq RSS.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>   lib/librte_ether/rte_ethdev.c | 39 ++++++++++++++++++++++++++++++++++-----
>   1 file changed, 34 insertions(+), 5 deletions(-)
>
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index 95f2ceb..59ff325 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -510,8 +510,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   
>   	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
>   		/* check multi-queue mode */
> -		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
> -		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
> +		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>   		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||
>   		    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
>   			/* SRIOV only works in VMDq enable mode */
> @@ -525,7 +524,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   		}
>   
>   		switch (dev_conf->rxmode.mq_mode) {
> -		case ETH_MQ_RX_VMDQ_RSS:
>   		case ETH_MQ_RX_VMDQ_DCB:
>   		case ETH_MQ_RX_VMDQ_DCB_RSS:
>   			/* DCB/RSS VMDQ in SRIOV mode, not implement yet */
> @@ -534,6 +532,39 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   					"unsupported VMDQ mq_mode rx %u\n",
>   					port_id, dev_conf->rxmode.mq_mode);
>   			return (-EINVAL);
> +		case ETH_MQ_RX_RSS:
> +			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
> +					" SRIOV active, "
> +					"Rx mq mode is changed from:"
> +					"mq_mode %u into VMDQ mq_mode %u\n",
> +					port_id,
> +					dev_conf->rxmode.mq_mode,
> +					dev->data->dev_conf.rxmode.mq_mode);
> +		case ETH_MQ_RX_VMDQ_RSS:
> +			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_RSS;
> +			if (nb_rx_q < RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) {
> +				switch (nb_rx_q) {
> +				case 1:
> +				case 2:
> +					RTE_ETH_DEV_SRIOV(dev).active =
> +						ETH_64_POOLS;
> +					break;
> +				case 4:
> +					RTE_ETH_DEV_SRIOV(dev).active =
> +						ETH_32_POOLS;
> +					break;
> +				default:
> +					PMD_DEBUG_TRACE("ethdev port_id=%d"
> +						" SRIOV active, "
> +						"queue number invalid\n",
> +						port_id);
> +					return -EINVAL;
> +				}
> +				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
> +				RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
> +					dev->pci_dev->max_vfs * nb_rx_q;
> +			}

Don't u need to return an error in the "else" here?

> +			break;
>   		default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
>   			/* if nothing mq mode configure, use default scheme */
>   			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;
> @@ -553,8 +584,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   		default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
>   			/* if nothing mq mode configure, use default scheme */
>   			dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;
> -			if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
> -				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
>   			break;
>   		}
>   

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
@ 2015-01-04  8:49         ` Vlad Zolotarov
  2015-01-04  9:01           ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-04  8:49 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 01/04/15 09:18, Ouyang Changchun wrote:
> Set VMDq RSS mode if it has VF(VF number is more than 1) and has RSS information.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>   app/test-pmd/testpmd.c | 10 ++++++++++
>   1 file changed, 10 insertions(+)
>
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index 8c69756..6230f8b 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -1708,6 +1708,16 @@ init_port_config(void)
>   				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
>   		}
>   
> +		if (port->dev_info.max_vfs != 0) {
> +			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
> +				port->dev_conf.rxmode.mq_mode =
> +					ETH_MQ_RX_VMDQ_RSS;
> +			else {
> +				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
> +				port->dev_conf.txmode.mq_mode = ETH_MQ_TX_NONE;

And what about the txmode.mq_mode when RSS is available (the :if" clause)?

> +			}
> +		}
> +
>   		port->rx_conf.rx_thresh = rx_thresh;
>   		port->rx_conf.rx_free_thresh = rx_free_thresh;
>   		port->rx_conf.rx_drop_en = rx_drop_en;

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API version
  2015-01-04  8:40               ` Vlad Zolotarov
@ 2015-01-04  8:51                 ` Ouyang, Changchun
  2015-01-04  9:37                   ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-04  8:51 UTC (permalink / raw)
  To: Vlad Zolotarov, dev

> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Sunday, January 4, 2015 4:40 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API version
> 
> 
> On 01/04/15 10:37, Ouyang, Changchun wrote:
> > Hi Vlad,
> >
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Sunday, January 4, 2015 4:30 PM
> >> To: Ouyang, Changchun; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API
> >> version
> >>
> >>
> >> On 01/04/15 10:26, Vlad Zolotarov wrote:
> >>> On 01/04/15 09:18, Ouyang Changchun wrote:
> >>>> Negotiate API version with VF when receiving the
> >>>> IXGBE_VF_API_NEGOTIATE message.
> >>>>
> >>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>> Reviewed-by: Vlad Zolotarov <vladz@cloudius-systems.com>
> > Thanks for your reviewing.
> >
> >> One small remark below.
> >>
> >>>> ---
> >>>>    lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  1 +
> >>>>    lib/librte_pmd_ixgbe/ixgbe_pf.c     | 25
> +++++++++++++++++++++++++
> >>>>    2 files changed, 26 insertions(+)
> >>>>
> >>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> >>>> b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> >>>> index ca99170..730098d 100644
> >>>> --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> >>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
> >>>> @@ -159,6 +159,7 @@ struct ixgbe_vf_info {
> >>>>        uint16_t tx_rate[IXGBE_MAX_QUEUE_NUM_PER_VF];
> >>>>        uint16_t vlan_count;
> >>>>        uint8_t spoofchk_enabled;
> >>>> +    uint8_t api_version;
> >>>>    };
> >>>>      /*
> >>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..495aff5 100644
> >>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>>> @@ -469,6 +469,28 @@ ixgbe_set_vf_lpe(struct rte_eth_dev *dev,
> >>>> __rte_unused uint32_t vf, uint32_t *ms
> >>>>    }
> >>>>      static int
> >>>> +ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf,
> >>>> uint32_t *msgbuf)
> >>>> +{
> >>>> +    uint32_t api_version = msgbuf[1];
> >>>> +    struct ixgbe_vf_info *vfinfo =
> >>>> +*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
> >>>> +
> >>>> +    switch (api_version) {
> >>>> +    case ixgbe_mbox_api_10:
> >>>> +    case ixgbe_mbox_api_11:
> >> Why version 2.0 is not negotiated?
> >>
> > Because it doesn't fully support 2.0 features yet.
> 
> Well, it that case u should not support 2.0 in patch 3 as well.
My opinion is that In patch 3, ixgbe_get_vf_queues need api_11 or api_20 to support it,
That mean the feature require those 2 api version, and it can't work with lower version like api_10.
Here the code show the pf has the capability of supporting api_10 and api_11,
I think it doesn't contradict. 
Thanks
Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
  2015-01-04  8:45         ` Vlad Zolotarov
@ 2015-01-04  8:58           ` Ouyang, Changchun
  2015-01-04  9:45             ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-04  8:58 UTC (permalink / raw)
  To: Vlad Zolotarov, dev


> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Sunday, January 4, 2015 4:45 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
> 
> 
> On 01/04/15 09:18, Ouyang Changchun wrote:
> > Check mq mode for VMDq RSS, handle it correctly instead of returning
> > an error; Also remove the limitation of per pool queue number has max
> > value of 1, because the per pool queue number could be 2 or 4 if it is
> > VMDq RSS mode;
> >
> > The number of rxq specified in config will determine the mq mode for
> VMDq RSS.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >   lib/librte_ether/rte_ethdev.c | 39
> ++++++++++++++++++++++++++++++++++-----
> >   1 file changed, 34 insertions(+), 5 deletions(-)
> >
> > diff --git a/lib/librte_ether/rte_ethdev.c
> > b/lib/librte_ether/rte_ethdev.c index 95f2ceb..59ff325 100644
> > --- a/lib/librte_ether/rte_ethdev.c
> > +++ b/lib/librte_ether/rte_ethdev.c
> > @@ -510,8 +510,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,
> > uint16_t nb_rx_q, uint16_t nb_tx_q,
> >
> >   	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
> >   		/* check multi-queue mode */
> > -		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
> > -		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
> > +		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
> >   		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS)
> ||
> >   		    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
> >   			/* SRIOV only works in VMDq enable mode */ @@ -
> 525,7 +524,6 @@
> > rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
> uint16_t nb_tx_q,
> >   		}
> >
> >   		switch (dev_conf->rxmode.mq_mode) {
> > -		case ETH_MQ_RX_VMDQ_RSS:
> >   		case ETH_MQ_RX_VMDQ_DCB:
> >   		case ETH_MQ_RX_VMDQ_DCB_RSS:
> >   			/* DCB/RSS VMDQ in SRIOV mode, not implement
> yet */ @@ -534,6
> > +532,39 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t
> nb_rx_q, uint16_t nb_tx_q,
> >   					"unsupported VMDQ mq_mode
> rx %u\n",
> >   					port_id, dev_conf-
> >rxmode.mq_mode);
> >   			return (-EINVAL);
> > +		case ETH_MQ_RX_RSS:
> > +			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
> > +					" SRIOV active, "
> > +					"Rx mq mode is changed from:"
> > +					"mq_mode %u into VMDQ
> mq_mode %u\n",
> > +					port_id,
> > +					dev_conf->rxmode.mq_mode,
> > +					dev->data-
> >dev_conf.rxmode.mq_mode);
> > +		case ETH_MQ_RX_VMDQ_RSS:
> > +			dev->data->dev_conf.rxmode.mq_mode =
> ETH_MQ_RX_VMDQ_RSS;
> > +			if (nb_rx_q <
> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) {
> > +				switch (nb_rx_q) {
> > +				case 1:
> > +				case 2:
> > +					RTE_ETH_DEV_SRIOV(dev).active =
> > +						ETH_64_POOLS;
> > +					break;
> > +				case 4:
> > +					RTE_ETH_DEV_SRIOV(dev).active =
> > +						ETH_32_POOLS;
> > +					break;
> > +				default:
> > +					PMD_DEBUG_TRACE("ethdev
> port_id=%d"
> > +						" SRIOV active, "
> > +						"queue number invalid\n",
> > +						port_id);
> > +					return -EINVAL;
> > +				}
> > +				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool =
> nb_rx_q;
> > +				RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
> > +					dev->pci_dev->max_vfs * nb_rx_q;
> > +			}
> 
> Don't u need to return an error in the "else" here?

Actually it has such a check after these code snippet, and it does return error for the else case,
Because it is original logic, I don't change any code around it, so it doesn't display here, you can check the codes.

Thanks
Changchun
   

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
  2015-01-04  8:49         ` Vlad Zolotarov
@ 2015-01-04  9:01           ` Ouyang, Changchun
  2015-01-04  9:46             ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-04  9:01 UTC (permalink / raw)
  To: Vlad Zolotarov, dev


> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Sunday, January 4, 2015 4:50 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
> 
> 
> On 01/04/15 09:18, Ouyang Changchun wrote:
> > Set VMDq RSS mode if it has VF(VF number is more than 1) and has RSS
> information.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >   app/test-pmd/testpmd.c | 10 ++++++++++
> >   1 file changed, 10 insertions(+)
> >
> > diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
> > 8c69756..6230f8b 100644
> > --- a/app/test-pmd/testpmd.c
> > +++ b/app/test-pmd/testpmd.c
> > @@ -1708,6 +1708,16 @@ init_port_config(void)
> >   				port->dev_conf.rxmode.mq_mode =
> ETH_MQ_RX_NONE;
> >   		}
> >
> > +		if (port->dev_info.max_vfs != 0) {
> > +			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
> > +				port->dev_conf.rxmode.mq_mode =
> > +					ETH_MQ_RX_VMDQ_RSS;
> > +			else {
> > +				port->dev_conf.rxmode.mq_mode =
> ETH_MQ_RX_NONE;
> > +				port->dev_conf.txmode.mq_mode =
> ETH_MQ_TX_NONE;
> 
> And what about the txmode.mq_mode when RSS is available (the :if" clause)?

I think we can keep its original value for txmode.mq_mode, so don't change its value. How do you think of it?
Thanks
Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API version
  2015-01-04  8:51                 ` Ouyang, Changchun
@ 2015-01-04  9:37                   ` Vlad Zolotarov
  0 siblings, 0 replies; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-04  9:37 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 01/04/15 10:51, Ouyang, Changchun wrote:
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Sunday, January 4, 2015 4:40 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API version
>>
>>
>> On 01/04/15 10:37, Ouyang, Changchun wrote:
>>> Hi Vlad,
>>>
>>>> -----Original Message-----
>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>> Sent: Sunday, January 4, 2015 4:30 PM
>>>> To: Ouyang, Changchun; dev@dpdk.org
>>>> Subject: Re: [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API
>>>> version
>>>>
>>>>
>>>> On 01/04/15 10:26, Vlad Zolotarov wrote:
>>>>> On 01/04/15 09:18, Ouyang Changchun wrote:
>>>>>> Negotiate API version with VF when receiving the
>>>>>> IXGBE_VF_API_NEGOTIATE message.
>>>>>>
>>>>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>>>> Reviewed-by: Vlad Zolotarov <vladz@cloudius-systems.com>
>>> Thanks for your reviewing.
>>>
>>>> One small remark below.
>>>>
>>>>>> ---
>>>>>>     lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  1 +
>>>>>>     lib/librte_pmd_ixgbe/ixgbe_pf.c     | 25
>> +++++++++++++++++++++++++
>>>>>>     2 files changed, 26 insertions(+)
>>>>>>
>>>>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
>>>>>> b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
>>>>>> index ca99170..730098d 100644
>>>>>> --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
>>>>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
>>>>>> @@ -159,6 +159,7 @@ struct ixgbe_vf_info {
>>>>>>         uint16_t tx_rate[IXGBE_MAX_QUEUE_NUM_PER_VF];
>>>>>>         uint16_t vlan_count;
>>>>>>         uint8_t spoofchk_enabled;
>>>>>> +    uint8_t api_version;
>>>>>>     };
>>>>>>       /*
>>>>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..495aff5 100644
>>>>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>>>> @@ -469,6 +469,28 @@ ixgbe_set_vf_lpe(struct rte_eth_dev *dev,
>>>>>> __rte_unused uint32_t vf, uint32_t *ms
>>>>>>     }
>>>>>>       static int
>>>>>> +ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf,
>>>>>> uint32_t *msgbuf)
>>>>>> +{
>>>>>> +    uint32_t api_version = msgbuf[1];
>>>>>> +    struct ixgbe_vf_info *vfinfo =
>>>>>> +*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
>>>>>> +
>>>>>> +    switch (api_version) {
>>>>>> +    case ixgbe_mbox_api_10:
>>>>>> +    case ixgbe_mbox_api_11:
>>>> Why version 2.0 is not negotiated?
>>>>
>>> Because it doesn't fully support 2.0 features yet.
>> Well, it that case u should not support 2.0 in patch 3 as well.
> My opinion is that In patch 3, ixgbe_get_vf_queues need api_11 or api_20 to support it,
> That mean the feature require those 2 api version, and it can't work with lower version like api_10.
> Here the code show the pf has the capability of supporting api_10 and api_11,
> I think it doesn't contradict.

After a second pass on this code and code in PATCH3 I agree that it's ok.

> Thanks
> Changchun
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
  2015-01-04  8:58           ` Ouyang, Changchun
@ 2015-01-04  9:45             ` Vlad Zolotarov
  2015-01-05  1:00               ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-04  9:45 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 01/04/15 10:58, Ouyang, Changchun wrote:
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Sunday, January 4, 2015 4:45 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
>>
>>
>> On 01/04/15 09:18, Ouyang Changchun wrote:
>>> Check mq mode for VMDq RSS, handle it correctly instead of returning
>>> an error; Also remove the limitation of per pool queue number has max
>>> value of 1, because the per pool queue number could be 2 or 4 if it is
>>> VMDq RSS mode;
>>>
>>> The number of rxq specified in config will determine the mq mode for
>> VMDq RSS.
>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>> ---
>>>    lib/librte_ether/rte_ethdev.c | 39
>> ++++++++++++++++++++++++++++++++++-----
>>>    1 file changed, 34 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/lib/librte_ether/rte_ethdev.c
>>> b/lib/librte_ether/rte_ethdev.c index 95f2ceb..59ff325 100644
>>> --- a/lib/librte_ether/rte_ethdev.c
>>> +++ b/lib/librte_ether/rte_ethdev.c
>>> @@ -510,8 +510,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,
>>> uint16_t nb_rx_q, uint16_t nb_tx_q,
>>>
>>>    	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
>>>    		/* check multi-queue mode */
>>> -		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
>>> -		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>>> +		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>>>    		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS)
>> ||
>>>    		    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
>>>    			/* SRIOV only works in VMDq enable mode */ @@ -
>> 525,7 +524,6 @@
>>> rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
>> uint16_t nb_tx_q,
>>>    		}
>>>
>>>    		switch (dev_conf->rxmode.mq_mode) {
>>> -		case ETH_MQ_RX_VMDQ_RSS:
>>>    		case ETH_MQ_RX_VMDQ_DCB:
>>>    		case ETH_MQ_RX_VMDQ_DCB_RSS:
>>>    			/* DCB/RSS VMDQ in SRIOV mode, not implement
>> yet */ @@ -534,6
>>> +532,39 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t
>> nb_rx_q, uint16_t nb_tx_q,
>>>    					"unsupported VMDQ mq_mode
>> rx %u\n",
>>>    					port_id, dev_conf-
>>> rxmode.mq_mode);
>>>    			return (-EINVAL);
>>> +		case ETH_MQ_RX_RSS:
>>> +			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
>>> +					" SRIOV active, "
>>> +					"Rx mq mode is changed from:"
>>> +					"mq_mode %u into VMDQ
>> mq_mode %u\n",
>>> +					port_id,
>>> +					dev_conf->rxmode.mq_mode,
>>> +					dev->data-
>>> dev_conf.rxmode.mq_mode);
>>> +		case ETH_MQ_RX_VMDQ_RSS:
>>> +			dev->data->dev_conf.rxmode.mq_mode =
>> ETH_MQ_RX_VMDQ_RSS;
>>> +			if (nb_rx_q <
>> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) {

Missed that before: shouldn't it be "<=" here?

>>> +				switch (nb_rx_q) {
>>> +				case 1:
>>> +				case 2:
>>> +					RTE_ETH_DEV_SRIOV(dev).active =
>>> +						ETH_64_POOLS;
>>> +					break;
>>> +				case 4:
>>> +					RTE_ETH_DEV_SRIOV(dev).active =
>>> +						ETH_32_POOLS;
>>> +					break;
>>> +				default:
>>> +					PMD_DEBUG_TRACE("ethdev
>> port_id=%d"
>>> +						" SRIOV active, "
>>> +						"queue number invalid\n",
>>> +						port_id);
>>> +					return -EINVAL;
>>> +				}
>>> +				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool =
>> nb_rx_q;
>>> +				RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
>>> +					dev->pci_dev->max_vfs * nb_rx_q;
>>> +			}
>> Don't u need to return an error in the "else" here?
> Actually it has such a check after these code snippet, and it does return error for the else case,
> Because it is original logic, I don't change any code around it, so it doesn't display here, you can check the codes.

I see. The flow is a bit confusing since the switch-case above will end 
up executing a "default" clause which will set 
RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool to 1 and then the error message in 
the check u are referring will be a bit confusing.

>
> Thanks
> Changchun
>     
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
  2015-01-04  9:01           ` Ouyang, Changchun
@ 2015-01-04  9:46             ` Vlad Zolotarov
  2015-01-05  2:38               ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-04  9:46 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 01/04/15 11:01, Ouyang, Changchun wrote:
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Sunday, January 4, 2015 4:50 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
>>
>>
>> On 01/04/15 09:18, Ouyang Changchun wrote:
>>> Set VMDq RSS mode if it has VF(VF number is more than 1) and has RSS
>> information.
>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>> ---
>>>    app/test-pmd/testpmd.c | 10 ++++++++++
>>>    1 file changed, 10 insertions(+)
>>>
>>> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
>>> 8c69756..6230f8b 100644
>>> --- a/app/test-pmd/testpmd.c
>>> +++ b/app/test-pmd/testpmd.c
>>> @@ -1708,6 +1708,16 @@ init_port_config(void)
>>>    				port->dev_conf.rxmode.mq_mode =
>> ETH_MQ_RX_NONE;
>>>    		}
>>>
>>> +		if (port->dev_info.max_vfs != 0) {
>>> +			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
>>> +				port->dev_conf.rxmode.mq_mode =
>>> +					ETH_MQ_RX_VMDQ_RSS;
>>> +			else {
>>> +				port->dev_conf.rxmode.mq_mode =
>> ETH_MQ_RX_NONE;
>>> +				port->dev_conf.txmode.mq_mode =
>> ETH_MQ_TX_NONE;
>>
>> And what about the txmode.mq_mode when RSS is available (the :if" clause)?
> I think we can keep its original value for txmode.mq_mode, so don't change its value. How do you think of it?

I agree that not changing a Tx mq_mode in both cases would be better.

> Thanks
> Changchun
>
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
  2015-01-04  9:45             ` Vlad Zolotarov
@ 2015-01-05  1:00               ` Ouyang, Changchun
  2015-01-05 10:09                 ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-05  1:00 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Sunday, January 4, 2015 5:46 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
> 
> 
> On 01/04/15 10:58, Ouyang, Changchun wrote:
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Sunday, January 4, 2015 4:45 PM
> >> To: Ouyang, Changchun; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
> >>
> >>
> >> On 01/04/15 09:18, Ouyang Changchun wrote:
> >>> Check mq mode for VMDq RSS, handle it correctly instead of returning
> >>> an error; Also remove the limitation of per pool queue number has
> >>> max value of 1, because the per pool queue number could be 2 or 4 if
> >>> it is VMDq RSS mode;
> >>>
> >>> The number of rxq specified in config will determine the mq mode for
> >> VMDq RSS.
> >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>> ---
> >>>    lib/librte_ether/rte_ethdev.c | 39
> >> ++++++++++++++++++++++++++++++++++-----
> >>>    1 file changed, 34 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/lib/librte_ether/rte_ethdev.c
> >>> b/lib/librte_ether/rte_ethdev.c index 95f2ceb..59ff325 100644
> >>> --- a/lib/librte_ether/rte_ethdev.c
> >>> +++ b/lib/librte_ether/rte_ethdev.c
> >>> @@ -510,8 +510,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,
> >>> uint16_t nb_rx_q, uint16_t nb_tx_q,
> >>>
> >>>    	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
> >>>    		/* check multi-queue mode */
> >>> -		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
> >>> -		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
> >>> +		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
> >>>    		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS)
> >> ||
> >>>    		    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
> >>>    			/* SRIOV only works in VMDq enable mode */ @@ -
> >> 525,7 +524,6 @@
> >>> rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
> >> uint16_t nb_tx_q,
> >>>    		}
> >>>
> >>>    		switch (dev_conf->rxmode.mq_mode) {
> >>> -		case ETH_MQ_RX_VMDQ_RSS:
> >>>    		case ETH_MQ_RX_VMDQ_DCB:
> >>>    		case ETH_MQ_RX_VMDQ_DCB_RSS:
> >>>    			/* DCB/RSS VMDQ in SRIOV mode, not implement
> >> yet */ @@ -534,6
> >>> +532,39 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t
> >> nb_rx_q, uint16_t nb_tx_q,
> >>>    					"unsupported VMDQ mq_mode
> >> rx %u\n",
> >>>    					port_id, dev_conf-
> >>> rxmode.mq_mode);
> >>>    			return (-EINVAL);
> >>> +		case ETH_MQ_RX_RSS:
> >>> +			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
> >>> +					" SRIOV active, "
> >>> +					"Rx mq mode is changed from:"
> >>> +					"mq_mode %u into VMDQ
> >> mq_mode %u\n",
> >>> +					port_id,
> >>> +					dev_conf->rxmode.mq_mode,
> >>> +					dev->data-
> >>> dev_conf.rxmode.mq_mode);
> >>> +		case ETH_MQ_RX_VMDQ_RSS:
> >>> +			dev->data->dev_conf.rxmode.mq_mode =
> >> ETH_MQ_RX_VMDQ_RSS;
> >>> +			if (nb_rx_q <
> >> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) {
> 
> Missed that before: shouldn't it be "<=" here?

Agree with you, need <= here, I will fix it in v5

> 
> >>> +				switch (nb_rx_q) {
> >>> +				case 1:
> >>> +				case 2:
> >>> +					RTE_ETH_DEV_SRIOV(dev).active =
> >>> +						ETH_64_POOLS;
> >>> +					break;
> >>> +				case 4:
> >>> +					RTE_ETH_DEV_SRIOV(dev).active =
> >>> +						ETH_32_POOLS;
> >>> +					break;
> >>> +				default:
> >>> +					PMD_DEBUG_TRACE("ethdev
> >> port_id=%d"
> >>> +						" SRIOV active, "
> >>> +						"queue number invalid\n",
> >>> +						port_id);
> >>> +					return -EINVAL;
> >>> +				}
> >>> +				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool =
> >> nb_rx_q;
> >>> +				RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
> >>> +					dev->pci_dev->max_vfs * nb_rx_q;
> >>> +			}
> >> Don't u need to return an error in the "else" here?
> > Actually it has such a check after these code snippet, and it does
> > return error for the else case, Because it is original logic, I don't change any
> code around it, so it doesn't display here, you can check the codes.
> 
> I see. The flow is a bit confusing since the switch-case above will end up
> executing a "default" clause which will set
> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool to 1 and then the error message
> in the check u are referring will be a bit confusing.

' set RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool to 1 ' is original code, which is for vmdq only case, or single queue case.
It is in default clause, and not in VMDQ_RSS clause.
I think my new code is ok here.

> >
> > Thanks
> > Changchun
> >
> >

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
  2015-01-04  9:46             ` Vlad Zolotarov
@ 2015-01-05  2:38               ` Ouyang, Changchun
  2015-01-05 10:12                 ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-05  2:38 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Sunday, January 4, 2015 5:47 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
> 
> 
> On 01/04/15 11:01, Ouyang, Changchun wrote:
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Sunday, January 4, 2015 4:50 PM
> >> To: Ouyang, Changchun; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
> >>
> >>
> >> On 01/04/15 09:18, Ouyang Changchun wrote:
> >>> Set VMDq RSS mode if it has VF(VF number is more than 1) and has RSS
> >> information.
> >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>> ---
> >>>    app/test-pmd/testpmd.c | 10 ++++++++++
> >>>    1 file changed, 10 insertions(+)
> >>>
> >>> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
> >>> 8c69756..6230f8b 100644
> >>> --- a/app/test-pmd/testpmd.c
> >>> +++ b/app/test-pmd/testpmd.c
> >>> @@ -1708,6 +1708,16 @@ init_port_config(void)
> >>>    				port->dev_conf.rxmode.mq_mode =
> >> ETH_MQ_RX_NONE;
> >>>    		}
> >>>
> >>> +		if (port->dev_info.max_vfs != 0) {
> >>> +			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
> >>> +				port->dev_conf.rxmode.mq_mode =
> >>> +					ETH_MQ_RX_VMDQ_RSS;
> >>> +			else {
> >>> +				port->dev_conf.rxmode.mq_mode =
> >> ETH_MQ_RX_NONE;
> >>> +				port->dev_conf.txmode.mq_mode =
> >> ETH_MQ_TX_NONE;
> >>
> >> And what about the txmode.mq_mode when RSS is available (the :if"
> clause)?
> > I think we can keep its original value for txmode.mq_mode, so don't
> change its value. How do you think of it?
> 
> I agree that not changing a Tx mq_mode in both cases would be better.

In the else clause, set txmode.mq_mode as ETH_MQ_TX_NONE explicitly to make sure it is neither ETH_MQ_TX_DCB,
ETH_MQ_TX_VMDQ_DCB, nor ETH_MQ_TX_VMDQ_ONLY.

> > Thanks
> > Changchun
> >
> >

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
  2015-01-04  8:38         ` Vlad Zolotarov
@ 2015-01-05  2:59           ` Ouyang, Changchun
  2015-01-05 10:07             ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-05  2:59 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Sunday, January 4, 2015 4:39 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
> 
> 
> On 01/04/15 09:18, Ouyang Changchun wrote:
> > Get the available Rx and Tx queue number when receiving
> IXGBE_VF_GET_QUEUES message from VF.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >   lib/librte_pmd_ixgbe/ixgbe_pf.c | 35
> ++++++++++++++++++++++++++++++++++-
> >   1 file changed, 34 insertions(+), 1 deletion(-)
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 495aff5..cbb0145 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > @@ -53,6 +53,8 @@
> >   #include "ixgbe_ethdev.h"
> >
> >   #define IXGBE_MAX_VFTA     (128)
> > +#define IXGBE_VF_MSG_SIZE_DEFAULT 1
> > +#define IXGBE_VF_GET_QUEUE_MSG_SIZE 5
> >
> >   static inline uint16_t
> >   dev_num_vf(struct rte_eth_dev *eth_dev) @@ -491,9 +493,36 @@
> > ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t
> *msgbuf)
> >   }
> >
> >   static int
> > +ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t
> > +*msgbuf) {
> > +	struct ixgbe_vf_info *vfinfo =
> > +		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data-
> >dev_private);
> > +	uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> > +
> > +	/* Verify if the PF supports the mbox APIs version or not */
> > +	switch (vfinfo[vf].api_version) {
> > +	case ixgbe_mbox_api_20:
> > +	case ixgbe_mbox_api_11:
> > +		break;
> > +	default:
> > +		return -1;
> > +	}
> > +
> > +	/* Notify VF of Rx and Tx queue number */
> > +	msgbuf[IXGBE_VF_RX_QUEUES] =
> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> > +	msgbuf[IXGBE_VF_TX_QUEUES] =
> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> > +
> > +	/* Notify VF of default queue */
> > +	msgbuf[IXGBE_VF_DEF_QUEUE] = default_q;
> 
> What about IXGBE_VF_TRANS_VLAN field?

This field is used for vlan strip or dcb case, which the vf rss don't need it.

> > +
> > +	return 0;
> > +}
> > +
> > +static int
> >   ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
> >   {
> >   	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
> > +	uint16_t msg_size = IXGBE_VF_MSG_SIZE_DEFAULT;
> >   	uint32_t msgbuf[IXGBE_VFMAILBOX_SIZE];
> >   	int32_t retval;
> >   	struct ixgbe_hw *hw =
> > IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > @@ -537,6 +566,10 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev,
> uint16_t vf)
> >   	case IXGBE_VF_API_NEGOTIATE:
> >   		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
> >   		break;
> > +	case IXGBE_VF_GET_QUEUES:
> > +		retval = ixgbe_get_vf_queues(dev, vf, msgbuf);
> > +		msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE;
> 
> Although the msg_size semantics and motivation is clear, if u want to do then
> do it all the way - add it to all other cases too not just to
> IXGBE_VF_GET_QUEUES.
> For instance, why do u write all 16 DWORDS for API negotiation (only 2 are
> required) and only here u decided to get "greedy"? ;)
> 
> My point is: either drop it completely or fix all other places as well.

This is because the actual message size required by 2 different message(api-negotiation and vf-get-queue)
are different, the first one require only 4 bytes, the second one need 20 bytes.
If both use 4 bytes, then the second one will have incomplete message.
If both use 20 bytes, then the first one will contain garbage info which is not necessary at all.
So the code logic looks as above.

> > +		break;
> >   	default:
> >   		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x",
> (unsigned)msgbuf[0]);
> >   		retval = IXGBE_ERR_MBX;
> > @@ -551,7 +584,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev,
> > uint16_t vf)
> >
> >   	msgbuf[0] |= IXGBE_VT_MSGTYPE_CTS;
> >
> > -	ixgbe_write_mbx(hw, msgbuf, 1, vf);
> > +	ixgbe_write_mbx(hw, msgbuf, msg_size, vf);
> >
> >   	return retval;
> >   }

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
  2015-01-05  2:59           ` Ouyang, Changchun
@ 2015-01-05 10:07             ` Vlad Zolotarov
  2015-01-06  1:54               ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-05 10:07 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 01/05/15 04:59, Ouyang, Changchun wrote:
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Sunday, January 4, 2015 4:39 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
>>
>>
>> On 01/04/15 09:18, Ouyang Changchun wrote:
>>> Get the available Rx and Tx queue number when receiving
>> IXGBE_VF_GET_QUEUES message from VF.
>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>> ---
>>>    lib/librte_pmd_ixgbe/ixgbe_pf.c | 35
>> ++++++++++++++++++++++++++++++++++-
>>>    1 file changed, 34 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 495aff5..cbb0145 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> @@ -53,6 +53,8 @@
>>>    #include "ixgbe_ethdev.h"
>>>
>>>    #define IXGBE_MAX_VFTA     (128)
>>> +#define IXGBE_VF_MSG_SIZE_DEFAULT 1
>>> +#define IXGBE_VF_GET_QUEUE_MSG_SIZE 5
>>>
>>>    static inline uint16_t
>>>    dev_num_vf(struct rte_eth_dev *eth_dev) @@ -491,9 +493,36 @@
>>> ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t
>> *msgbuf)
>>>    }
>>>
>>>    static int
>>> +ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t
>>> +*msgbuf) {
>>> +	struct ixgbe_vf_info *vfinfo =
>>> +		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data-
>>> dev_private);
>>> +	uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
>>> +
>>> +	/* Verify if the PF supports the mbox APIs version or not */
>>> +	switch (vfinfo[vf].api_version) {
>>> +	case ixgbe_mbox_api_20:
>>> +	case ixgbe_mbox_api_11:
>>> +		break;
>>> +	default:
>>> +		return -1;
>>> +	}
>>> +
>>> +	/* Notify VF of Rx and Tx queue number */
>>> +	msgbuf[IXGBE_VF_RX_QUEUES] =
>> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
>>> +	msgbuf[IXGBE_VF_TX_QUEUES] =
>> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
>>> +
>>> +	/* Notify VF of default queue */
>>> +	msgbuf[IXGBE_VF_DEF_QUEUE] = default_q;
>> What about IXGBE_VF_TRANS_VLAN field?
> This field is used for vlan strip or dcb case, which the vf rss don't need it.

But VFs do support VLAN stripping and u don't add it to just RSS. If VFs 
do not support VLAN stripping in the DPDK yet they should and then we 
will need this field.

>
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int
>>>    ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
>>>    {
>>>    	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
>>> +	uint16_t msg_size = IXGBE_VF_MSG_SIZE_DEFAULT;
>>>    	uint32_t msgbuf[IXGBE_VFMAILBOX_SIZE];
>>>    	int32_t retval;
>>>    	struct ixgbe_hw *hw =
>>> IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>> @@ -537,6 +566,10 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev,
>> uint16_t vf)
>>>    	case IXGBE_VF_API_NEGOTIATE:
>>>    		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
>>>    		break;
>>> +	case IXGBE_VF_GET_QUEUES:
>>> +		retval = ixgbe_get_vf_queues(dev, vf, msgbuf);
>>> +		msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE;
>> Although the msg_size semantics and motivation is clear, if u want to do then
>> do it all the way - add it to all other cases too not just to
>> IXGBE_VF_GET_QUEUES.
>> For instance, why do u write all 16 DWORDS for API negotiation (only 2 are
>> required) and only here u decided to get "greedy"? ;)
>>
>> My point is: either drop it completely or fix all other places as well.
> This is because the actual message size required by 2 different message(api-negotiation and vf-get-queue)
> are different, the first one require only 4 bytes, the second one need 20 bytes.
> If both use 4 bytes, then the second one will have incomplete message.
> If both use 20 bytes, then the first one will contain garbage info which is not necessary at all.
> So the code logic looks as above.

I understood the motivation at the first place but as I've explained 
above we already bring the garbage for some opcodes like API 
negotiation. So, u should either fix it for all opcodes like u did for 
GET_QUEUES or just drop it in GET_QUEUES and fix it for all opcodes in a 
different patch.

>
>>> +		break;
>>>    	default:
>>>    		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x",
>> (unsigned)msgbuf[0]);
>>>    		retval = IXGBE_ERR_MBX;
>>> @@ -551,7 +584,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev,
>>> uint16_t vf)
>>>
>>>    	msgbuf[0] |= IXGBE_VT_MSGTYPE_CTS;
>>>
>>> -	ixgbe_write_mbx(hw, msgbuf, 1, vf);
>>> +	ixgbe_write_mbx(hw, msgbuf, msg_size, vf);
>>>
>>>    	return retval;
>>>    }

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
  2015-01-05  1:00               ` Ouyang, Changchun
@ 2015-01-05 10:09                 ` Vlad Zolotarov
  2015-01-06  1:56                   ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-05 10:09 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 01/05/15 03:00, Ouyang, Changchun wrote:
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Sunday, January 4, 2015 5:46 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
>>
>>
>> On 01/04/15 10:58, Ouyang, Changchun wrote:
>>>> -----Original Message-----
>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>> Sent: Sunday, January 4, 2015 4:45 PM
>>>> To: Ouyang, Changchun; dev@dpdk.org
>>>> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
>>>>
>>>>
>>>> On 01/04/15 09:18, Ouyang Changchun wrote:
>>>>> Check mq mode for VMDq RSS, handle it correctly instead of returning
>>>>> an error; Also remove the limitation of per pool queue number has
>>>>> max value of 1, because the per pool queue number could be 2 or 4 if
>>>>> it is VMDq RSS mode;
>>>>>
>>>>> The number of rxq specified in config will determine the mq mode for
>>>> VMDq RSS.
>>>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>>>> ---
>>>>>     lib/librte_ether/rte_ethdev.c | 39
>>>> ++++++++++++++++++++++++++++++++++-----
>>>>>     1 file changed, 34 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/lib/librte_ether/rte_ethdev.c
>>>>> b/lib/librte_ether/rte_ethdev.c index 95f2ceb..59ff325 100644
>>>>> --- a/lib/librte_ether/rte_ethdev.c
>>>>> +++ b/lib/librte_ether/rte_ethdev.c
>>>>> @@ -510,8 +510,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,
>>>>> uint16_t nb_rx_q, uint16_t nb_tx_q,
>>>>>
>>>>>     	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
>>>>>     		/* check multi-queue mode */
>>>>> -		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
>>>>> -		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>>>>> +		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>>>>>     		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS)
>>>> ||
>>>>>     		    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
>>>>>     			/* SRIOV only works in VMDq enable mode */ @@ -
>>>> 525,7 +524,6 @@
>>>>> rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
>>>> uint16_t nb_tx_q,
>>>>>     		}
>>>>>
>>>>>     		switch (dev_conf->rxmode.mq_mode) {
>>>>> -		case ETH_MQ_RX_VMDQ_RSS:
>>>>>     		case ETH_MQ_RX_VMDQ_DCB:
>>>>>     		case ETH_MQ_RX_VMDQ_DCB_RSS:
>>>>>     			/* DCB/RSS VMDQ in SRIOV mode, not implement
>>>> yet */ @@ -534,6
>>>>> +532,39 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t
>>>> nb_rx_q, uint16_t nb_tx_q,
>>>>>     					"unsupported VMDQ mq_mode
>>>> rx %u\n",
>>>>>     					port_id, dev_conf-
>>>>> rxmode.mq_mode);
>>>>>     			return (-EINVAL);
>>>>> +		case ETH_MQ_RX_RSS:
>>>>> +			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
>>>>> +					" SRIOV active, "
>>>>> +					"Rx mq mode is changed from:"
>>>>> +					"mq_mode %u into VMDQ
>>>> mq_mode %u\n",
>>>>> +					port_id,
>>>>> +					dev_conf->rxmode.mq_mode,
>>>>> +					dev->data-
>>>>> dev_conf.rxmode.mq_mode);
>>>>> +		case ETH_MQ_RX_VMDQ_RSS:
>>>>> +			dev->data->dev_conf.rxmode.mq_mode =
>>>> ETH_MQ_RX_VMDQ_RSS;
>>>>> +			if (nb_rx_q <
>>>> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) {
>> Missed that before: shouldn't it be "<=" here?
> Agree with you, need <= here, I will fix it in v5
>
>>>>> +				switch (nb_rx_q) {
>>>>> +				case 1:
>>>>> +				case 2:
>>>>> +					RTE_ETH_DEV_SRIOV(dev).active =
>>>>> +						ETH_64_POOLS;
>>>>> +					break;
>>>>> +				case 4:
>>>>> +					RTE_ETH_DEV_SRIOV(dev).active =
>>>>> +						ETH_32_POOLS;
>>>>> +					break;
>>>>> +				default:
>>>>> +					PMD_DEBUG_TRACE("ethdev
>>>> port_id=%d"
>>>>> +						" SRIOV active, "
>>>>> +						"queue number invalid\n",
>>>>> +						port_id);
>>>>> +					return -EINVAL;
>>>>> +				}
>>>>> +				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool =
>>>> nb_rx_q;
>>>>> +				RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
>>>>> +					dev->pci_dev->max_vfs * nb_rx_q;
>>>>> +			}
>>>> Don't u need to return an error in the "else" here?
>>> Actually it has such a check after these code snippet, and it does
>>> return error for the else case, Because it is original logic, I don't change any
>> code around it, so it doesn't display here, you can check the codes.
>>
>> I see. The flow is a bit confusing since the switch-case above will end up
>> executing a "default" clause which will set
>> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool to 1 and then the error message
>> in the check u are referring will be a bit confusing.
> ' set RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool to 1 ' is original code, which is for vmdq only case, or single queue case.
> It is in default clause, and not in VMDQ_RSS clause.
> I think my new code is ok here.

The original code is ok and your current code will work. The only 
problem with your new code is that in case on an error like I've 
described above the error message will be confusing.

>
>>> Thanks
>>> Changchun
>>>
>>>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
  2015-01-05  2:38               ` Ouyang, Changchun
@ 2015-01-05 10:12                 ` Vlad Zolotarov
  2015-01-06  2:01                   ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-05 10:12 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 01/05/15 04:38, Ouyang, Changchun wrote:
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Sunday, January 4, 2015 5:47 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
>>
>>
>> On 01/04/15 11:01, Ouyang, Changchun wrote:
>>>> -----Original Message-----
>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>> Sent: Sunday, January 4, 2015 4:50 PM
>>>> To: Ouyang, Changchun; dev@dpdk.org
>>>> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
>>>>
>>>>
>>>> On 01/04/15 09:18, Ouyang Changchun wrote:
>>>>> Set VMDq RSS mode if it has VF(VF number is more than 1) and has RSS
>>>> information.
>>>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>>>> ---
>>>>>     app/test-pmd/testpmd.c | 10 ++++++++++
>>>>>     1 file changed, 10 insertions(+)
>>>>>
>>>>> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
>>>>> 8c69756..6230f8b 100644
>>>>> --- a/app/test-pmd/testpmd.c
>>>>> +++ b/app/test-pmd/testpmd.c
>>>>> @@ -1708,6 +1708,16 @@ init_port_config(void)
>>>>>     				port->dev_conf.rxmode.mq_mode =
>>>> ETH_MQ_RX_NONE;
>>>>>     		}
>>>>>
>>>>> +		if (port->dev_info.max_vfs != 0) {
>>>>> +			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
>>>>> +				port->dev_conf.rxmode.mq_mode =
>>>>> +					ETH_MQ_RX_VMDQ_RSS;
>>>>> +			else {
>>>>> +				port->dev_conf.rxmode.mq_mode =
>>>> ETH_MQ_RX_NONE;
>>>>> +				port->dev_conf.txmode.mq_mode =
>>>> ETH_MQ_TX_NONE;
>>>>
>>>> And what about the txmode.mq_mode when RSS is available (the :if"
>> clause)?
>>> I think we can keep its original value for txmode.mq_mode, so don't
>> change its value. How do you think of it?
>>
>> I agree that not changing a Tx mq_mode in both cases would be better.
> In the else clause, set txmode.mq_mode as ETH_MQ_TX_NONE explicitly to make sure it is neither ETH_MQ_TX_DCB,
> ETH_MQ_TX_VMDQ_DCB, nor ETH_MQ_TX_VMDQ_ONLY.

It's not obvious to me why u should do that since AFAIK any of these 
modes requires RX_RSS. Do I miss anything?

>
>>> Thanks
>>> Changchun
>>>
>>>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2014-12-26  1:52             ` Ouyang, Changchun
  2014-12-26  6:49               ` Vladislav Zolotarov
@ 2015-01-05 10:29               ` Bruce Richardson
  2015-01-06  1:00                 ` Ouyang, Changchun
  1 sibling, 1 reply; 144+ messages in thread
From: Bruce Richardson @ 2015-01-05 10:29 UTC (permalink / raw)
  To: Ouyang, Changchun; +Cc: dev

On Fri, Dec 26, 2014 at 01:52:25AM +0000, Ouyang, Changchun wrote:
> 
> 
> > -----Original Message-----
> > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > Sent: Thursday, December 25, 2014 9:20 PM
> > To: Ouyang, Changchun; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> > 
 > 
> > Like I said before, trying to guess what user wants is a way to making a code
> > that is very hard to use and to maintain. Pls., just return an error and let the
> > user code deal with it the way he/she really wants and not the way u *think*
> > he/she wants.
> > 
> I didn't disagree on this, either :-)
> If you have strong reason for this way and more guys agree with it,
> I will modify it probably in v4. 
> > >
+1 on returning error.

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
  2014-12-25  1:46       ` Ouyang, Changchun
@ 2015-01-05 10:38         ` Bruce Richardson
  2015-01-05 13:02           ` Vlad Zolotarov
  2015-01-06  1:04           ` Ouyang, Changchun
  0 siblings, 2 replies; 144+ messages in thread
From: Bruce Richardson @ 2015-01-05 10:38 UTC (permalink / raw)
  To: Ouyang, Changchun; +Cc: dev

On Thu, Dec 25, 2014 at 01:46:54AM +0000, Ouyang, Changchun wrote:
> Hi,
> 
> > -----Original Message-----
> > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > Sent: Wednesday, December 24, 2014 5:59 PM
> > To: Ouyang, Changchun; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
> > 
 > 
> > On the contrary - it's a very good idea! We use DPDK on Amazon's guests
> > with enhanced networking and we have no access to the PF. We still need to
> > know the RSS redirection rules for our VF pool. From the 82599 spec, chapter
> > 4.6.10.1.1: "redirection table is common to all the pools and only indicates the
> > queue inside the pool to use once the pool is chosen". In that case we need
> > to get the whole 128 entries of the RETA. Is there a reason why we can't have
> > it?
> >
> Due to hardware limitation, VF could not query its own reta table, because there is not its own reta,
> The reta table shared by pf and all vfs.
> If you need know it, query them on pf is feasible way to do it.
>
It's not feasible if you only have access to a guest. :-) 
IMHO since the guest is seeing the results of the RSS redirection table, 
it should be able to query the table, if it wants. It should not, however, 
be able to modify the table, as it is owned by the PF.

Regards,
/Bruce

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
  2015-01-05 10:38         ` Bruce Richardson
@ 2015-01-05 13:02           ` Vlad Zolotarov
  2015-01-06  1:11             ` Ouyang, Changchun
  2015-01-06  1:04           ` Ouyang, Changchun
  1 sibling, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-05 13:02 UTC (permalink / raw)
  To: Bruce Richardson, Ouyang, Changchun; +Cc: dev


On 01/05/15 12:38, Bruce Richardson wrote:
> On Thu, Dec 25, 2014 at 01:46:54AM +0000, Ouyang, Changchun wrote:
>> Hi,
>>
>>> -----Original Message-----
>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>> Sent: Wednesday, December 24, 2014 5:59 PM
>>> To: Ouyang, Changchun; dev@dpdk.org
>>> Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
>>>
>   >
>>> On the contrary - it's a very good idea! We use DPDK on Amazon's guests
>>> with enhanced networking and we have no access to the PF. We still need to
>>> know the RSS redirection rules for our VF pool. From the 82599 spec, chapter
>>> 4.6.10.1.1: "redirection table is common to all the pools and only indicates the
>>> queue inside the pool to use once the pool is chosen". In that case we need
>>> to get the whole 128 entries of the RETA. Is there a reason why we can't have
>>> it?
>>>
>> Due to hardware limitation, VF could not query its own reta table, because there is not its own reta,
>> The reta table shared by pf and all vfs.
>> If you need know it, query them on pf is feasible way to do it.
>>
> It's not feasible if you only have access to a guest. :-)
> IMHO since the guest is seeing the results of the RSS redirection table,
> it should be able to query the table, if it wants. It should not, however,
> be able to modify the table, as it is owned by the PF.

This is exactly what I meant! ;)
The problem at the moment is that upstream PF driver has no VF-PF 
command for that and I'm in the process of pushing the patch for it. 
Then it's accepted (and pushed into the Amazon's HV ;))
then DPDK's VF driver may proceed with what u and me are suggesting.

Not related question to Intel guys: I can't find a x550 spec in the net. 
Can anybody tell me where it may be found? ;)

>
> Regards,
> /Bruce
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
  2015-01-05 10:29               ` Bruce Richardson
@ 2015-01-06  1:00                 ` Ouyang, Changchun
  0 siblings, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-06  1:00 UTC (permalink / raw)
  To: Richardson, Bruce; +Cc: dev



> -----Original Message-----
> From: Richardson, Bruce
> Sent: Monday, January 5, 2015 6:29 PM
> To: Ouyang, Changchun
> Cc: Vlad Zolotarov; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> On Fri, Dec 26, 2014 at 01:52:25AM +0000, Ouyang, Changchun wrote:
> >
> >
> > > -----Original Message-----
> > > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > > Sent: Thursday, December 25, 2014 9:20 PM
> > > To: Ouyang, Changchun; dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> > >
>  >
> > > Like I said before, trying to guess what user wants is a way to
> > > making a code that is very hard to use and to maintain. Pls., just
> > > return an error and let the user code deal with it the way he/she
> > > really wants and not the way u *think* he/she wants.
> > >
> > I didn't disagree on this, either :-)
> > If you have strong reason for this way and more guys agree with it, I
> > will modify it probably in v4.
> > > >
> +1 on returning error.
It does in v4 patch.

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
  2015-01-05 10:38         ` Bruce Richardson
  2015-01-05 13:02           ` Vlad Zolotarov
@ 2015-01-06  1:04           ` Ouyang, Changchun
  1 sibling, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-06  1:04 UTC (permalink / raw)
  To: Richardson, Bruce; +Cc: dev



> -----Original Message-----
> From: Richardson, Bruce
> Sent: Monday, January 5, 2015 6:38 PM
> To: Ouyang, Changchun
> Cc: Vlad Zolotarov; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
> 
> On Thu, Dec 25, 2014 at 01:46:54AM +0000, Ouyang, Changchun wrote:
> > Hi,
> >
> > > -----Original Message-----
> > > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > > Sent: Wednesday, December 24, 2014 5:59 PM
> > > To: Ouyang, Changchun; dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
> > >
>  >
> > > On the contrary - it's a very good idea! We use DPDK on Amazon's
> > > guests with enhanced networking and we have no access to the PF. We
> > > still need to know the RSS redirection rules for our VF pool. From
> > > the 82599 spec, chapter
> > > 4.6.10.1.1: "redirection table is common to all the pools and only
> > > indicates the queue inside the pool to use once the pool is chosen".
> > > In that case we need to get the whole 128 entries of the RETA. Is
> > > there a reason why we can't have it?
> > >
> > Due to hardware limitation, VF could not query its own reta table,
> > because there is not its own reta, The reta table shared by pf and all vfs.
> > If you need know it, query them on pf is feasible way to do it.
> >
> It's not feasible if you only have access to a guest. :-) IMHO since the guest is
> seeing the results of the RSS redirection table, it should be able to query the
> table, if it wants. It should not, however, be able to modify the table, as it is
> owned by the PF.

Understand the situation, but the real issue is that the guest has no way to know the rss redirection table, so it could not query it.
 

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
  2015-01-05 13:02           ` Vlad Zolotarov
@ 2015-01-06  1:11             ` Ouyang, Changchun
  2015-01-06 11:18               ` Vlad Zolotarov
  2015-01-06 11:18               ` Vlad Zolotarov
  0 siblings, 2 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-06  1:11 UTC (permalink / raw)
  To: Vlad Zolotarov, Richardson, Bruce; +Cc: dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Monday, January 5, 2015 9:02 PM
> To: Richardson, Bruce; Ouyang, Changchun
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
> 
> 
> On 01/05/15 12:38, Bruce Richardson wrote:
> > On Thu, Dec 25, 2014 at 01:46:54AM +0000, Ouyang, Changchun wrote:
> >> Hi,
> >>
> >>> -----Original Message-----
> >>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >>> Sent: Wednesday, December 24, 2014 5:59 PM
> >>> To: Ouyang, Changchun; dev@dpdk.org
> >>> Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
> >>>
> >   >
> >>> On the contrary - it's a very good idea! We use DPDK on Amazon's
> >>> guests with enhanced networking and we have no access to the PF. We
> >>> still need to know the RSS redirection rules for our VF pool. From
> >>> the 82599 spec, chapter
> >>> 4.6.10.1.1: "redirection table is common to all the pools and only
> >>> indicates the queue inside the pool to use once the pool is chosen".
> >>> In that case we need to get the whole 128 entries of the RETA. Is
> >>> there a reason why we can't have it?
> >>>
> >> Due to hardware limitation, VF could not query its own reta table,
> >> because there is not its own reta, The reta table shared by pf and all vfs.
> >> If you need know it, query them on pf is feasible way to do it.
> >>
> > It's not feasible if you only have access to a guest. :-) IMHO since
> > the guest is seeing the results of the RSS redirection table, it
> > should be able to query the table, if it wants. It should not,
> > however, be able to modify the table, as it is owned by the PF.
> 
> This is exactly what I meant! ;)
> The problem at the moment is that upstream PF driver has no VF-PF
> command for that and I'm in the process of pushing the patch for it.
> Then it's accepted (and pushed into the Amazon's HV ;)) then DPDK's VF
> driver may proceed with what u and me are suggesting.

Besides lack of command between pf and vf, another issue, pf also need know which entries from the whole 128 entries in reta table are assigned 
To a specified vf. 

> 
> Not related question to Intel guys: I can't find a x550 spec in the net.
> Can anybody tell me where it may be found? ;)

AFAIK, not yet

> >
> > Regards,
> > /Bruce
> >

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
  2015-01-05 10:07             ` Vlad Zolotarov
@ 2015-01-06  1:54               ` Ouyang, Changchun
  2015-01-06 11:26                 ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-06  1:54 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Monday, January 5, 2015 6:07 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
> 
> 
> On 01/05/15 04:59, Ouyang, Changchun wrote:
> >
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Sunday, January 4, 2015 4:39 PM
> >> To: Ouyang, Changchun; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
> >>
> >>
> >> On 01/04/15 09:18, Ouyang Changchun wrote:
> >>> Get the available Rx and Tx queue number when receiving
> >> IXGBE_VF_GET_QUEUES message from VF.
> >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>> ---
> >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c | 35
> >> ++++++++++++++++++++++++++++++++++-
> >>>    1 file changed, 34 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 495aff5..cbb0145 100644
> >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> @@ -53,6 +53,8 @@
> >>>    #include "ixgbe_ethdev.h"
> >>>
> >>>    #define IXGBE_MAX_VFTA     (128)
> >>> +#define IXGBE_VF_MSG_SIZE_DEFAULT 1 #define
> >>> +IXGBE_VF_GET_QUEUE_MSG_SIZE 5
> >>>
> >>>    static inline uint16_t
> >>>    dev_num_vf(struct rte_eth_dev *eth_dev) @@ -491,9 +493,36 @@
> >>> ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf,
> >>> uint32_t
> >> *msgbuf)
> >>>    }
> >>>
> >>>    static int
> >>> +ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t
> >>> +*msgbuf) {
> >>> +	struct ixgbe_vf_info *vfinfo =
> >>> +		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data-
> >>> dev_private);
> >>> +	uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> >>> +
> >>> +	/* Verify if the PF supports the mbox APIs version or not */
> >>> +	switch (vfinfo[vf].api_version) {
> >>> +	case ixgbe_mbox_api_20:
> >>> +	case ixgbe_mbox_api_11:
> >>> +		break;
> >>> +	default:
> >>> +		return -1;
> >>> +	}
> >>> +
> >>> +	/* Notify VF of Rx and Tx queue number */
> >>> +	msgbuf[IXGBE_VF_RX_QUEUES] =
> >> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> >>> +	msgbuf[IXGBE_VF_TX_QUEUES] =
> >> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> >>> +
> >>> +	/* Notify VF of default queue */
> >>> +	msgbuf[IXGBE_VF_DEF_QUEUE] = default_q;
> >> What about IXGBE_VF_TRANS_VLAN field?
> > This field is used for vlan strip or dcb case, which the vf rss don't need it.
> 
> But VFs do support VLAN stripping and u don't add it to just RSS. If VFs do not
> support VLAN stripping in the DPDK yet they should and then we will need
> this field.

If I don't miss your point, you also agree it is not related to vf rss itself, right?
As for Vlan stripping, it need another patch to support it.
  
> >
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int
> >>>    ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
> >>>    {
> >>>    	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
> >>> +	uint16_t msg_size = IXGBE_VF_MSG_SIZE_DEFAULT;
> >>>    	uint32_t msgbuf[IXGBE_VFMAILBOX_SIZE];
> >>>    	int32_t retval;
> >>>    	struct ixgbe_hw *hw =
> >>> IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>> @@ -537,6 +566,10 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev
> *dev,
> >> uint16_t vf)
> >>>    	case IXGBE_VF_API_NEGOTIATE:
> >>>    		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
> >>>    		break;
> >>> +	case IXGBE_VF_GET_QUEUES:
> >>> +		retval = ixgbe_get_vf_queues(dev, vf, msgbuf);
> >>> +		msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE;
> >> Although the msg_size semantics and motivation is clear, if u want to do
> then
> >> do it all the way - add it to all other cases too not just to
> >> IXGBE_VF_GET_QUEUES.
> >> For instance, why do u write all 16 DWORDS for API negotiation (only 2 are
> >> required) and only here u decided to get "greedy"? ;)
> >>
> >> My point is: either drop it completely or fix all other places as well.
> > This is because the actual message size required by 2 different
> message(api-negotiation and vf-get-queue)
> > are different, the first one require only 4 bytes, the second one need 20
> bytes.
> > If both use 4 bytes, then the second one will have incomplete message.
> > If both use 20 bytes, then the first one will contain garbage info which is not
> necessary at all.
> > So the code logic looks as above.
> 
> I understood the motivation at the first place but as I've explained
> above we already bring the garbage for some opcodes like API
> negotiation. So, u should either fix it for all opcodes like u did for
> GET_QUEUES or just drop it in GET_QUEUES and fix it for all opcodes in a
> different patch.

Here maybe I miss your point, my understanding is that  4 bytes are enough for all other opcode except for get_queue opcode,
 get_queues is the only one that need 20  bytes currently.
So I don't quite understand why I need fix any codes which we both think they are right.   

> >
> >>> +		break;
> >>>    	default:
> >>>    		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x",
> >> (unsigned)msgbuf[0]);
> >>>    		retval = IXGBE_ERR_MBX;
> >>> @@ -551,7 +584,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev
> *dev,
> >>> uint16_t vf)
> >>>
> >>>    	msgbuf[0] |= IXGBE_VT_MSGTYPE_CTS;
> >>>
> >>> -	ixgbe_write_mbx(hw, msgbuf, 1, vf);
> >>> +	ixgbe_write_mbx(hw, msgbuf, msg_size, vf);
> >>>
> >>>    	return retval;
> >>>    }

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
  2015-01-05 10:09                 ` Vlad Zolotarov
@ 2015-01-06  1:56                   ` Ouyang, Changchun
  2015-01-06 19:56                     ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-06  1:56 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Monday, January 5, 2015 6:10 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
> 
> 
> On 01/05/15 03:00, Ouyang, Changchun wrote:
> >
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Sunday, January 4, 2015 5:46 PM
> >> To: Ouyang, Changchun; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
> >>
> >>
> >> On 01/04/15 10:58, Ouyang, Changchun wrote:
> >>>> -----Original Message-----
> >>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >>>> Sent: Sunday, January 4, 2015 4:45 PM
> >>>> To: Ouyang, Changchun; dev@dpdk.org
> >>>> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
> >>>>
> >>>>
> >>>> On 01/04/15 09:18, Ouyang Changchun wrote:
> >>>>> Check mq mode for VMDq RSS, handle it correctly instead of
> >>>>> returning an error; Also remove the limitation of per pool queue
> >>>>> number has max value of 1, because the per pool queue number
> could
> >>>>> be 2 or 4 if it is VMDq RSS mode;
> >>>>>
> >>>>> The number of rxq specified in config will determine the mq mode
> >>>>> for
> >>>> VMDq RSS.
> >>>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>>>> ---
> >>>>>     lib/librte_ether/rte_ethdev.c | 39
> >>>> ++++++++++++++++++++++++++++++++++-----
> >>>>>     1 file changed, 34 insertions(+), 5 deletions(-)
> >>>>>
> >>>>> diff --git a/lib/librte_ether/rte_ethdev.c
> >>>>> b/lib/librte_ether/rte_ethdev.c index 95f2ceb..59ff325 100644
> >>>>> --- a/lib/librte_ether/rte_ethdev.c
> >>>>> +++ b/lib/librte_ether/rte_ethdev.c
> >>>>> @@ -510,8 +510,7 @@ rte_eth_dev_check_mq_mode(uint8_t
> port_id,
> >>>>> uint16_t nb_rx_q, uint16_t nb_tx_q,
> >>>>>
> >>>>>     	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
> >>>>>     		/* check multi-queue mode */
> >>>>> -		if ((dev_conf->rxmode.mq_mode ==
> ETH_MQ_RX_RSS) ||
> >>>>> -		    (dev_conf->rxmode.mq_mode ==
> ETH_MQ_RX_DCB) ||
> >>>>> +		if ((dev_conf->rxmode.mq_mode ==
> ETH_MQ_RX_DCB) ||
> >>>>>     		    (dev_conf->rxmode.mq_mode ==
> ETH_MQ_RX_DCB_RSS)
> >>>> ||
> >>>>>     		    (dev_conf->txmode.mq_mode ==
> ETH_MQ_TX_DCB)) {
> >>>>>     			/* SRIOV only works in VMDq enable mode
> */ @@ -
> >>>> 525,7 +524,6 @@
> >>>>> rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
> >>>> uint16_t nb_tx_q,
> >>>>>     		}
> >>>>>
> >>>>>     		switch (dev_conf->rxmode.mq_mode) {
> >>>>> -		case ETH_MQ_RX_VMDQ_RSS:
> >>>>>     		case ETH_MQ_RX_VMDQ_DCB:
> >>>>>     		case ETH_MQ_RX_VMDQ_DCB_RSS:
> >>>>>     			/* DCB/RSS VMDQ in SRIOV mode, not
> implement
> >>>> yet */ @@ -534,6
> >>>>> +532,39 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t
> >>>> nb_rx_q, uint16_t nb_tx_q,
> >>>>>     					"unsupported VMDQ
> mq_mode
> >>>> rx %u\n",
> >>>>>     					port_id, dev_conf-
> >>>>> rxmode.mq_mode);
> >>>>>     			return (-EINVAL);
> >>>>> +		case ETH_MQ_RX_RSS:
> >>>>> +			PMD_DEBUG_TRACE("ethdev port_id=%"
> PRIu8
> >>>>> +					" SRIOV active, "
> >>>>> +					"Rx mq mode is changed
> from:"
> >>>>> +					"mq_mode %u into VMDQ
> >>>> mq_mode %u\n",
> >>>>> +					port_id,
> >>>>> +					dev_conf-
> >rxmode.mq_mode,
> >>>>> +					dev->data-
> >>>>> dev_conf.rxmode.mq_mode);
> >>>>> +		case ETH_MQ_RX_VMDQ_RSS:
> >>>>> +			dev->data->dev_conf.rxmode.mq_mode =
> >>>> ETH_MQ_RX_VMDQ_RSS;
> >>>>> +			if (nb_rx_q <
> >>>> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) {
> >> Missed that before: shouldn't it be "<=" here?
> > Agree with you, need <= here, I will fix it in v5
> >
> >>>>> +				switch (nb_rx_q) {
> >>>>> +				case 1:
> >>>>> +				case 2:
> >>>>> +
> 	RTE_ETH_DEV_SRIOV(dev).active =
> >>>>> +						ETH_64_POOLS;
> >>>>> +					break;
> >>>>> +				case 4:
> >>>>> +
> 	RTE_ETH_DEV_SRIOV(dev).active =
> >>>>> +						ETH_32_POOLS;
> >>>>> +					break;
> >>>>> +				default:
> >>>>> +
> 	PMD_DEBUG_TRACE("ethdev
> >>>> port_id=%d"
> >>>>> +						" SRIOV active, "
> >>>>> +						"queue number
> invalid\n",
> >>>>> +						port_id);
> >>>>> +					return -EINVAL;
> >>>>> +				}
> >>>>> +
> 	RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool =
> >>>> nb_rx_q;
> >>>>> +
> 	RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
> >>>>> +					dev->pci_dev->max_vfs *
> nb_rx_q;
> >>>>> +			}
> >>>> Don't u need to return an error in the "else" here?
> >>> Actually it has such a check after these code snippet, and it does
> >>> return error for the else case, Because it is original logic, I
> >>> don't change any
> >> code around it, so it doesn't display here, you can check the codes.
> >>
> >> I see. The flow is a bit confusing since the switch-case above will
> >> end up executing a "default" clause which will set
> >> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool to 1 and then the error
> message
> >> in the check u are referring will be a bit confusing.
> > ' set RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool to 1 ' is original code,
> which is for vmdq only case, or single queue case.
> > It is in default clause, and not in VMDQ_RSS clause.
> > I think my new code is ok here.
> 
> The original code is ok and your current code will work. The only problem
> with your new code is that in case on an error like I've described above the
> error message will be confusing.

Then what's your suggestion for the better log message?  I can consider refine it if you have better one.

> >
> >>> Thanks
> >>> Changchun
> >>>
> >>>


^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
  2015-01-05 10:12                 ` Vlad Zolotarov
@ 2015-01-06  2:01                   ` Ouyang, Changchun
  2015-01-06 12:53                     ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-06  2:01 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Monday, January 5, 2015 6:12 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
> 
> 
> On 01/05/15 04:38, Ouyang, Changchun wrote:
> >
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Sunday, January 4, 2015 5:47 PM
> >> To: Ouyang, Changchun; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
> >>
> >>
> >> On 01/04/15 11:01, Ouyang, Changchun wrote:
> >>>> -----Original Message-----
> >>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >>>> Sent: Sunday, January 4, 2015 4:50 PM
> >>>> To: Ouyang, Changchun; dev@dpdk.org
> >>>> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS
> >>>> mode
> >>>>
> >>>>
> >>>> On 01/04/15 09:18, Ouyang Changchun wrote:
> >>>>> Set VMDq RSS mode if it has VF(VF number is more than 1) and has
> >>>>> RSS
> >>>> information.
> >>>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>>>> ---
> >>>>>     app/test-pmd/testpmd.c | 10 ++++++++++
> >>>>>     1 file changed, 10 insertions(+)
> >>>>>
> >>>>> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
> >>>>> 8c69756..6230f8b 100644
> >>>>> --- a/app/test-pmd/testpmd.c
> >>>>> +++ b/app/test-pmd/testpmd.c
> >>>>> @@ -1708,6 +1708,16 @@ init_port_config(void)
> >>>>>     				port->dev_conf.rxmode.mq_mode =
> >>>> ETH_MQ_RX_NONE;
> >>>>>     		}
> >>>>>
> >>>>> +		if (port->dev_info.max_vfs != 0) {
> >>>>> +			if (port-
> >dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
> >>>>> +				port->dev_conf.rxmode.mq_mode =
> >>>>> +					ETH_MQ_RX_VMDQ_RSS;
> >>>>> +			else {
> >>>>> +				port->dev_conf.rxmode.mq_mode =
> >>>> ETH_MQ_RX_NONE;
> >>>>> +				port->dev_conf.txmode.mq_mode =
> >>>> ETH_MQ_TX_NONE;
> >>>>
> >>>> And what about the txmode.mq_mode when RSS is available (the :if"
> >> clause)?
> >>> I think we can keep its original value for txmode.mq_mode, so don't
> >> change its value. How do you think of it?
> >>
> >> I agree that not changing a Tx mq_mode in both cases would be better.
> > In the else clause, set txmode.mq_mode as ETH_MQ_TX_NONE explicitly
> to
> > make sure it is neither ETH_MQ_TX_DCB, ETH_MQ_TX_VMDQ_DCB, nor
> ETH_MQ_TX_VMDQ_ONLY.
> 
> It's not obvious to me why u should do that since AFAIK any of these modes
> requires RX_RSS. Do I miss anything?

No, I don't think so, in the else clause, it doesn't need rx_rss, and no way to do it,
because the case is there is no rss configuration information(note: in the else clause, dev_conf.rx_adv_conf.rss_conf.rss_hf == 0). 

So ETH_MQ_RX_NONE for rx_mode, and ETH_MQ_TX_NONE for tx_mode.

> >
> >>> Thanks
> >>> Changchun
> >>>
> >>>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
  2015-01-06  1:11             ` Ouyang, Changchun
@ 2015-01-06 11:18               ` Vlad Zolotarov
  2015-01-06 11:18               ` Vlad Zolotarov
  1 sibling, 0 replies; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-06 11:18 UTC (permalink / raw)
  To: Ouyang, Changchun, Richardson, Bruce; +Cc: dev


On 01/06/15 03:11, Ouyang, Changchun wrote:
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Monday, January 5, 2015 9:02 PM
>> To: Richardson, Bruce; Ouyang, Changchun
>> Cc: dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
>>
>>
>> On 01/05/15 12:38, Bruce Richardson wrote:
>>> On Thu, Dec 25, 2014 at 01:46:54AM +0000, Ouyang, Changchun wrote:
>>>> Hi,
>>>>
>>>>> -----Original Message-----
>>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>>> Sent: Wednesday, December 24, 2014 5:59 PM
>>>>> To: Ouyang, Changchun; dev@dpdk.org
>>>>> Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
>>>>>
>>>    >
>>>>> On the contrary - it's a very good idea! We use DPDK on Amazon's
>>>>> guests with enhanced networking and we have no access to the PF. We
>>>>> still need to know the RSS redirection rules for our VF pool. From
>>>>> the 82599 spec, chapter
>>>>> 4.6.10.1.1: "redirection table is common to all the pools and only
>>>>> indicates the queue inside the pool to use once the pool is chosen".
>>>>> In that case we need to get the whole 128 entries of the RETA. Is
>>>>> there a reason why we can't have it?
>>>>>
>>>> Due to hardware limitation, VF could not query its own reta table,
>>>> because there is not its own reta, The reta table shared by pf and all vfs.
>>>> If you need know it, query them on pf is feasible way to do it.
>>>>
>>> It's not feasible if you only have access to a guest. :-) IMHO since
>>> the guest is seeing the results of the RSS redirection table, it
>>> should be able to query the table, if it wants. It should not,
>>> however, be able to modify the table, as it is owned by the PF.
>> This is exactly what I meant! ;)
>> The problem at the moment is that upstream PF driver has no VF-PF
>> command for that and I'm in the process of pushing the patch for it.
>> Then it's accepted (and pushed into the Amazon's HV ;)) then DPDK's VF
>> driver may proceed with what u and me are suggesting.
> Besides lack of command between pf and vf, another issue, pf also need know which entries from the whole 128 entries in reta table are assigned
> To a specified vf.

First of all PF knows since it configures it for a VF in a x550 and for 
older devices the (whole) RETA is shared between the PF and VF. There is 
a per-pool RTYPE[n].RQPL
register that defines the number of lsb's from the redirection table to 
consider (see my patch series "ixgbevf: Allow querying VFs RSS 
indirection table and key" in the netdev list).

>
>> Not related question to Intel guys: I can't find a x550 spec in the net.
>> Can anybody tell me where it may be found? ;)
> AFAIK, not yet

Well, too bad... ;) I'll have to hope I guessed right during the driver 
reverse engineering... ;)

>
>>> Regards,
>>> /Bruce
>>>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
  2015-01-06  1:11             ` Ouyang, Changchun
  2015-01-06 11:18               ` Vlad Zolotarov
@ 2015-01-06 11:18               ` Vlad Zolotarov
  1 sibling, 0 replies; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-06 11:18 UTC (permalink / raw)
  To: Ouyang, Changchun, Richardson, Bruce; +Cc: dev


On 01/06/15 03:11, Ouyang, Changchun wrote:
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Monday, January 5, 2015 9:02 PM
>> To: Richardson, Bruce; Ouyang, Changchun
>> Cc: dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
>>
>>
>> On 01/05/15 12:38, Bruce Richardson wrote:
>>> On Thu, Dec 25, 2014 at 01:46:54AM +0000, Ouyang, Changchun wrote:
>>>> Hi,
>>>>
>>>>> -----Original Message-----
>>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>>> Sent: Wednesday, December 24, 2014 5:59 PM
>>>>> To: Ouyang, Changchun; dev@dpdk.org
>>>>> Subject: Re: [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic
>>>>>
>>>    >
>>>>> On the contrary - it's a very good idea! We use DPDK on Amazon's
>>>>> guests with enhanced networking and we have no access to the PF. We
>>>>> still need to know the RSS redirection rules for our VF pool. From
>>>>> the 82599 spec, chapter
>>>>> 4.6.10.1.1: "redirection table is common to all the pools and only
>>>>> indicates the queue inside the pool to use once the pool is chosen".
>>>>> In that case we need to get the whole 128 entries of the RETA. Is
>>>>> there a reason why we can't have it?
>>>>>
>>>> Due to hardware limitation, VF could not query its own reta table,
>>>> because there is not its own reta, The reta table shared by pf and all vfs.
>>>> If you need know it, query them on pf is feasible way to do it.
>>>>
>>> It's not feasible if you only have access to a guest. :-) IMHO since
>>> the guest is seeing the results of the RSS redirection table, it
>>> should be able to query the table, if it wants. It should not,
>>> however, be able to modify the table, as it is owned by the PF.
>> This is exactly what I meant! ;)
>> The problem at the moment is that upstream PF driver has no VF-PF
>> command for that and I'm in the process of pushing the patch for it.
>> Then it's accepted (and pushed into the Amazon's HV ;)) then DPDK's VF
>> driver may proceed with what u and me are suggesting.
> Besides lack of command between pf and vf, another issue, pf also need know which entries from the whole 128 entries in reta table are assigned
> To a specified vf.

First of all PF knows since it configures it for a VF in a x550 and for 
older devices the (whole) RETA is shared between the PF and VF. There is 
a per-pool RTYPE[n].RQPL
register that defines the number of lsb's from the redirection table to 
consider (see my patch series "ixgbevf: Allow querying VFs RSS 
indirection table and key" in the netdev list).

>
>> Not related question to Intel guys: I can't find a x550 spec in the net.
>> Can anybody tell me where it may be found? ;)
> AFAIK, not yet

Well, too bad... ;) I'll have to hope I guessed right during the driver 
reverse engineering... ;)

>
>>> Regards,
>>> /Bruce
>>>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
  2015-01-06  1:54               ` Ouyang, Changchun
@ 2015-01-06 11:26                 ` Vlad Zolotarov
  2015-01-07  1:18                   ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-06 11:26 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 01/06/15 03:54, Ouyang, Changchun wrote:
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Monday, January 5, 2015 6:07 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
>>
>>
>> On 01/05/15 04:59, Ouyang, Changchun wrote:
>>>> -----Original Message-----
>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>> Sent: Sunday, January 4, 2015 4:39 PM
>>>> To: Ouyang, Changchun; dev@dpdk.org
>>>> Subject: Re: [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
>>>>
>>>>
>>>> On 01/04/15 09:18, Ouyang Changchun wrote:
>>>>> Get the available Rx and Tx queue number when receiving
>>>> IXGBE_VF_GET_QUEUES message from VF.
>>>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>>>> ---
>>>>>     lib/librte_pmd_ixgbe/ixgbe_pf.c | 35
>>>> ++++++++++++++++++++++++++++++++++-
>>>>>     1 file changed, 34 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 495aff5..cbb0145 100644
>>>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>>> @@ -53,6 +53,8 @@
>>>>>     #include "ixgbe_ethdev.h"
>>>>>
>>>>>     #define IXGBE_MAX_VFTA     (128)
>>>>> +#define IXGBE_VF_MSG_SIZE_DEFAULT 1 #define
>>>>> +IXGBE_VF_GET_QUEUE_MSG_SIZE 5
>>>>>
>>>>>     static inline uint16_t
>>>>>     dev_num_vf(struct rte_eth_dev *eth_dev) @@ -491,9 +493,36 @@
>>>>> ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf,
>>>>> uint32_t
>>>> *msgbuf)
>>>>>     }
>>>>>
>>>>>     static int
>>>>> +ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t
>>>>> +*msgbuf) {
>>>>> +	struct ixgbe_vf_info *vfinfo =
>>>>> +		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data-
>>>>> dev_private);
>>>>> +	uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
>>>>> +
>>>>> +	/* Verify if the PF supports the mbox APIs version or not */
>>>>> +	switch (vfinfo[vf].api_version) {
>>>>> +	case ixgbe_mbox_api_20:
>>>>> +	case ixgbe_mbox_api_11:
>>>>> +		break;
>>>>> +	default:
>>>>> +		return -1;
>>>>> +	}
>>>>> +
>>>>> +	/* Notify VF of Rx and Tx queue number */
>>>>> +	msgbuf[IXGBE_VF_RX_QUEUES] =
>>>> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
>>>>> +	msgbuf[IXGBE_VF_TX_QUEUES] =
>>>> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
>>>>> +
>>>>> +	/* Notify VF of default queue */
>>>>> +	msgbuf[IXGBE_VF_DEF_QUEUE] = default_q;
>>>> What about IXGBE_VF_TRANS_VLAN field?
>>> This field is used for vlan strip or dcb case, which the vf rss don't need it.
>> But VFs do support VLAN stripping and u don't add it to just RSS. If VFs do not
>> support VLAN stripping in the DPDK yet they should and then we will need
>> this field.
> If I don't miss your point, you also agree it is not related to vf rss itself, right?

Right.

> As for Vlan stripping, it need another patch to support it.

Well, at least put some fat comment in bold there that some the fields 
in the command is not filled and why. ;)

>    
>>>>> +
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>> +static int
>>>>>     ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
>>>>>     {
>>>>>     	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
>>>>> +	uint16_t msg_size = IXGBE_VF_MSG_SIZE_DEFAULT;
>>>>>     	uint32_t msgbuf[IXGBE_VFMAILBOX_SIZE];
>>>>>     	int32_t retval;
>>>>>     	struct ixgbe_hw *hw =
>>>>> IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>>>> @@ -537,6 +566,10 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev
>> *dev,
>>>> uint16_t vf)
>>>>>     	case IXGBE_VF_API_NEGOTIATE:
>>>>>     		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
>>>>>     		break;
>>>>> +	case IXGBE_VF_GET_QUEUES:
>>>>> +		retval = ixgbe_get_vf_queues(dev, vf, msgbuf);
>>>>> +		msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE;
>>>> Although the msg_size semantics and motivation is clear, if u want to do
>> then
>>>> do it all the way - add it to all other cases too not just to
>>>> IXGBE_VF_GET_QUEUES.
>>>> For instance, why do u write all 16 DWORDS for API negotiation (only 2 are
>>>> required) and only here u decided to get "greedy"? ;)
>>>>
>>>> My point is: either drop it completely or fix all other places as well.
>>> This is because the actual message size required by 2 different
>> message(api-negotiation and vf-get-queue)
>>> are different, the first one require only 4 bytes, the second one need 20
>> bytes.
>>> If both use 4 bytes, then the second one will have incomplete message.
>>> If both use 20 bytes, then the first one will contain garbage info which is not
>> necessary at all.
>>> So the code logic looks as above.
>> I understood the motivation at the first place but as I've explained
>> above we already bring the garbage for some opcodes like API
>> negotiation. So, u should either fix it for all opcodes like u did for
>> GET_QUEUES or just drop it in GET_QUEUES and fix it for all opcodes in a
>> different patch.
> Here maybe I miss your point, my understanding is that  4 bytes are enough for all other opcode except for get_queue opcode,
>   get_queues is the only one that need 20  bytes currently.
> So I don't quite understand why I need fix any codes which we both think they are right.

Ooops. I missed the default value msg_size is 1 - I've confused its 
initialization with mbx_size initialization. So, u are right - your code 
is perfectly fine. My apologies! ;)

>
>>>>> +		break;
>>>>>     	default:
>>>>>     		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x",
>>>> (unsigned)msgbuf[0]);
>>>>>     		retval = IXGBE_ERR_MBX;
>>>>> @@ -551,7 +584,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev
>> *dev,
>>>>> uint16_t vf)
>>>>>
>>>>>     	msgbuf[0] |= IXGBE_VT_MSGTYPE_CTS;
>>>>>
>>>>> -	ixgbe_write_mbx(hw, msgbuf, 1, vf);
>>>>> +	ixgbe_write_mbx(hw, msgbuf, msg_size, vf);
>>>>>
>>>>>     	return retval;
>>>>>     }

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
  2015-01-06  2:01                   ` Ouyang, Changchun
@ 2015-01-06 12:53                     ` Vlad Zolotarov
  2015-01-07  1:50                       ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-06 12:53 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 01/06/15 04:01, Ouyang, Changchun wrote:
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Monday, January 5, 2015 6:12 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
>>
>>
>> On 01/05/15 04:38, Ouyang, Changchun wrote:
>>>> -----Original Message-----
>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>> Sent: Sunday, January 4, 2015 5:47 PM
>>>> To: Ouyang, Changchun; dev@dpdk.org
>>>> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
>>>>
>>>>
>>>> On 01/04/15 11:01, Ouyang, Changchun wrote:
>>>>>> -----Original Message-----
>>>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>>>> Sent: Sunday, January 4, 2015 4:50 PM
>>>>>> To: Ouyang, Changchun; dev@dpdk.org
>>>>>> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS
>>>>>> mode
>>>>>>
>>>>>>
>>>>>> On 01/04/15 09:18, Ouyang Changchun wrote:
>>>>>>> Set VMDq RSS mode if it has VF(VF number is more than 1) and has
>>>>>>> RSS
>>>>>> information.
>>>>>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>>>>>> ---
>>>>>>>      app/test-pmd/testpmd.c | 10 ++++++++++
>>>>>>>      1 file changed, 10 insertions(+)
>>>>>>>
>>>>>>> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
>>>>>>> 8c69756..6230f8b 100644
>>>>>>> --- a/app/test-pmd/testpmd.c
>>>>>>> +++ b/app/test-pmd/testpmd.c
>>>>>>> @@ -1708,6 +1708,16 @@ init_port_config(void)
>>>>>>>      				port->dev_conf.rxmode.mq_mode =
>>>>>> ETH_MQ_RX_NONE;
>>>>>>>      		}
>>>>>>>
>>>>>>> +		if (port->dev_info.max_vfs != 0) {
>>>>>>> +			if (port-
>>> dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
>>>>>>> +				port->dev_conf.rxmode.mq_mode =
>>>>>>> +					ETH_MQ_RX_VMDQ_RSS;
>>>>>>> +			else {
>>>>>>> +				port->dev_conf.rxmode.mq_mode =
>>>>>> ETH_MQ_RX_NONE;
>>>>>>> +				port->dev_conf.txmode.mq_mode =
>>>>>> ETH_MQ_TX_NONE;
>>>>>>
>>>>>> And what about the txmode.mq_mode when RSS is available (the :if"
>>>> clause)?
>>>>> I think we can keep its original value for txmode.mq_mode, so don't
>>>> change its value. How do you think of it?
>>>>
>>>> I agree that not changing a Tx mq_mode in both cases would be better.
>>> In the else clause, set txmode.mq_mode as ETH_MQ_TX_NONE explicitly
>> to
>>> make sure it is neither ETH_MQ_TX_DCB, ETH_MQ_TX_VMDQ_DCB, nor
>> ETH_MQ_TX_VMDQ_ONLY.
>>
>> It's not obvious to me why u should do that since AFAIK any of these modes
>> requires RX_RSS. Do I miss anything?
> No, I don't think so, in the else clause, it doesn't need rx_rss, and no way to do it,
> because the case is there is no rss configuration information(note: in the else clause, dev_conf.rx_adv_conf.rss_conf.rss_hf == 0).
>
> So ETH_MQ_RX_NONE for rx_mode, and ETH_MQ_TX_NONE for tx_mode.

Of course, however, in general, one may ask, why u configure TX MQ mode 
in "else" clause an don't do it in the "if" one. Possibly the "if" case 
in TX MQ context has been handled elsewhere but this is what makes this 
code confusing: to make it the most readable u'd rather configure the 
same feature set in both "if" and "else".
For instance:

if (bla-bla) {
   tx_mode = X1;
   rx_mode = X2;
} else {
  tx_mode = Y1;
  rx_mode = Y2;
}

Look at the non-SR-IOV clause right above the "if-else" block u've 
added. Why don't they configure tx_mode there? Is it a bug in their code?
By the way, u forgot to fix the remark below

/* In SR-IOV mode, RSS mode is not available */

which is located a few lines above the code u've added. ;)

>
>>>>> Thanks
>>>>> Changchun
>>>>>
>>>>>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
  2015-01-06  1:56                   ` Ouyang, Changchun
@ 2015-01-06 19:56                     ` Vlad Zolotarov
  2015-01-07  2:28                       ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-06 19:56 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 01/06/15 03:56, Ouyang, Changchun wrote:
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Monday, January 5, 2015 6:10 PM
>> To: Ouyang, Changchun;dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
>>
>>
>> On 01/05/15 03:00, Ouyang, Changchun wrote:
>>>> -----Original Message-----
>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>> Sent: Sunday, January 4, 2015 5:46 PM
>>>> To: Ouyang, Changchun;dev@dpdk.org
>>>> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
>>>>
>>>>
>>>> On 01/04/15 10:58, Ouyang, Changchun wrote:
>>>>>> -----Original Message-----
>>>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>>>> Sent: Sunday, January 4, 2015 4:45 PM
>>>>>> To: Ouyang, Changchun;dev@dpdk.org
>>>>>> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
>>>>>>
>>>>>>
>>>>>> On 01/04/15 09:18, Ouyang Changchun wrote:
>>>>>>> Check mq mode for VMDq RSS, handle it correctly instead of
>>>>>>> returning an error; Also remove the limitation of per pool queue
>>>>>>> number has max value of 1, because the per pool queue number
>> could
>>>>>>> be 2 or 4 if it is VMDq RSS mode;
>>>>>>>
>>>>>>> The number of rxq specified in config will determine the mq mode
>>>>>>> for
>>>>>> VMDq RSS.
>>>>>>> Signed-off-by: Changchun Ouyang<changchun.ouyang@intel.com>
>>>>>>> ---
>>>>>>>      lib/librte_ether/rte_ethdev.c | 39
>>>>>> ++++++++++++++++++++++++++++++++++-----
>>>>>>>      1 file changed, 34 insertions(+), 5 deletions(-)
>>>>>>>
>>>>>>> diff --git a/lib/librte_ether/rte_ethdev.c
>>>>>>> b/lib/librte_ether/rte_ethdev.c index 95f2ceb..59ff325 100644
>>>>>>> --- a/lib/librte_ether/rte_ethdev.c
>>>>>>> +++ b/lib/librte_ether/rte_ethdev.c
>>>>>>> @@ -510,8 +510,7 @@ rte_eth_dev_check_mq_mode(uint8_t
>> port_id,
>>>>>>> uint16_t nb_rx_q, uint16_t nb_tx_q,
>>>>>>>
>>>>>>>      	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
>>>>>>>      		/* check multi-queue mode */
>>>>>>> -		if ((dev_conf->rxmode.mq_mode ==
>> ETH_MQ_RX_RSS) ||
>>>>>>> -		    (dev_conf->rxmode.mq_mode ==
>> ETH_MQ_RX_DCB) ||
>>>>>>> +		if ((dev_conf->rxmode.mq_mode ==
>> ETH_MQ_RX_DCB) ||
>>>>>>>      		    (dev_conf->rxmode.mq_mode ==
>> ETH_MQ_RX_DCB_RSS)
>>>>>> ||
>>>>>>>      		    (dev_conf->txmode.mq_mode ==
>> ETH_MQ_TX_DCB)) {
>>>>>>>      			/* SRIOV only works in VMDq enable mode
>> */ @@ -
>>>>>> 525,7 +524,6 @@
>>>>>>> rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
>>>>>> uint16_t nb_tx_q,
>>>>>>>      		}
>>>>>>>
>>>>>>>      		switch (dev_conf->rxmode.mq_mode) {
>>>>>>> -		case ETH_MQ_RX_VMDQ_RSS:
>>>>>>>      		case ETH_MQ_RX_VMDQ_DCB:
>>>>>>>      		case ETH_MQ_RX_VMDQ_DCB_RSS:
>>>>>>>      			/* DCB/RSS VMDQ in SRIOV mode, not
>> implement
>>>>>> yet */ @@ -534,6
>>>>>>> +532,39 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t
>>>>>> nb_rx_q, uint16_t nb_tx_q,
>>>>>>>      					"unsupported VMDQ
>> mq_mode
>>>>>> rx %u\n",
>>>>>>>      					port_id, dev_conf-
>>>>>>> rxmode.mq_mode);
>>>>>>>      			return (-EINVAL);
>>>>>>> +		case ETH_MQ_RX_RSS:
>>>>>>> +			PMD_DEBUG_TRACE("ethdev port_id=%"
>> PRIu8
>>>>>>> +					" SRIOV active, "
>>>>>>> +					"Rx mq mode is changed
>> from:"
>>>>>>> +					"mq_mode %u into VMDQ
>>>>>> mq_mode %u\n",
>>>>>>> +					port_id,
>>>>>>> +					dev_conf-
>>> rxmode.mq_mode,
>>>>>>> +					dev->data-
>>>>>>> dev_conf.rxmode.mq_mode);
>>>>>>> +		case ETH_MQ_RX_VMDQ_RSS:
>>>>>>> +			dev->data->dev_conf.rxmode.mq_mode =
>>>>>> ETH_MQ_RX_VMDQ_RSS;
>>>>>>> +			if (nb_rx_q <
>>>>>> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) {
>>>> Missed that before: shouldn't it be "<=" here?
>>> Agree with you, need <= here, I will fix it in v5
>>>
>>>>>>> +				switch (nb_rx_q) {
>>>>>>> +				case 1:
>>>>>>> +				case 2:
>>>>>>> +
>> 	RTE_ETH_DEV_SRIOV(dev).active =
>>>>>>> +						ETH_64_POOLS;
>>>>>>> +					break;
>>>>>>> +				case 4:
>>>>>>> +
>> 	RTE_ETH_DEV_SRIOV(dev).active =
>>>>>>> +						ETH_32_POOLS;
>>>>>>> +					break;
>>>>>>> +				default:
>>>>>>> +
>> 	PMD_DEBUG_TRACE("ethdev
>>>>>> port_id=%d"
>>>>>>> +						" SRIOV active, "
>>>>>>> +						"queue number
>> invalid\n",
>>>>>>> +						port_id);
>>>>>>> +					return -EINVAL;
>>>>>>> +				}
>>>>>>> +
>> 	RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool =
>>>>>> nb_rx_q;
>>>>>>> +
>> 	RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
>>>>>>> +					dev->pci_dev->max_vfs *
>> nb_rx_q;
>>>>>>> +			}
>>>>>> Don't u need to return an error in the "else" here?
>>>>> Actually it has such a check after these code snippet, and it does
>>>>> return error for the else case, Because it is original logic, I
>>>>> don't change any
>>>> code around it, so it doesn't display here, you can check the codes.
>>>>
>>>> I see. The flow is a bit confusing since the switch-case above will
>>>> end up executing a "default" clause which will set
>>>> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool to 1 and then the error
>> message
>>>> in the check u are referring will be a bit confusing.
>>> ' set RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool to 1 ' is original code,
>> which is for vmdq only case, or single queue case.
>>> It is in default clause, and not in VMDQ_RSS clause.
>>> I think my new code is ok here.
>> The original code is ok and your current code will work. The only problem
>> with your new code is that in case on an error like I've described above the
>> error message will be confusing.
> Then what's your suggestion for the better log message?  I can consider refine it if you have better one.

Just like I've suggested before - u may break with appropriate error 
message right when u see the problem (in a "else" clause). This way the 
code will be both more readable and more robust and won't break if 
anybody decides to change the not-RSS-specific logic u r relying on.

>>>>> Thanks
>>>>> Changchun
>>>>>
>>>>>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
  2015-01-06 11:26                 ` Vlad Zolotarov
@ 2015-01-07  1:18                   ` Ouyang, Changchun
  0 siblings, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-07  1:18 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Tuesday, January 6, 2015 7:27 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
> 
> 
> On 01/06/15 03:54, Ouyang, Changchun wrote:
> >
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Monday, January 5, 2015 6:07 PM
> >> To: Ouyang, Changchun; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
> >>
> >>
> >> On 01/05/15 04:59, Ouyang, Changchun wrote:
> >>>> -----Original Message-----
> >>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >>>> Sent: Sunday, January 4, 2015 4:39 PM
> >>>> To: Ouyang, Changchun; dev@dpdk.org
> >>>> Subject: Re: [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number
> >>>>
> >>>>
> >>>> On 01/04/15 09:18, Ouyang Changchun wrote:
> >>>>> Get the available Rx and Tx queue number when receiving
> >>>> IXGBE_VF_GET_QUEUES message from VF.
> >>>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>>>> ---
> >>>>>     lib/librte_pmd_ixgbe/ixgbe_pf.c | 35
> >>>> ++++++++++++++++++++++++++++++++++-
> >>>>>     1 file changed, 34 insertions(+), 1 deletion(-)
> >>>>>
> >>>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 495aff5..cbb0145 100644
> >>>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>>>> @@ -53,6 +53,8 @@
> >>>>>     #include "ixgbe_ethdev.h"
> >>>>>
> >>>>>     #define IXGBE_MAX_VFTA     (128)
> >>>>> +#define IXGBE_VF_MSG_SIZE_DEFAULT 1 #define
> >>>>> +IXGBE_VF_GET_QUEUE_MSG_SIZE 5
> >>>>>
> >>>>>     static inline uint16_t
> >>>>>     dev_num_vf(struct rte_eth_dev *eth_dev) @@ -491,9 +493,36
> @@
> >>>>> ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf,
> >>>>> uint32_t
> >>>> *msgbuf)
> >>>>>     }
> >>>>>
> >>>>>     static int
> >>>>> +ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf,
> >>>>> +uint32_t
> >>>>> +*msgbuf) {
> >>>>> +	struct ixgbe_vf_info *vfinfo =
> >>>>> +		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data-
> >>>>> dev_private);
> >>>>> +	uint32_t default_q = vf *
> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> >>>>> +
> >>>>> +	/* Verify if the PF supports the mbox APIs version or not */
> >>>>> +	switch (vfinfo[vf].api_version) {
> >>>>> +	case ixgbe_mbox_api_20:
> >>>>> +	case ixgbe_mbox_api_11:
> >>>>> +		break;
> >>>>> +	default:
> >>>>> +		return -1;
> >>>>> +	}
> >>>>> +
> >>>>> +	/* Notify VF of Rx and Tx queue number */
> >>>>> +	msgbuf[IXGBE_VF_RX_QUEUES] =
> >>>> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> >>>>> +	msgbuf[IXGBE_VF_TX_QUEUES] =
> >>>> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> >>>>> +
> >>>>> +	/* Notify VF of default queue */
> >>>>> +	msgbuf[IXGBE_VF_DEF_QUEUE] = default_q;
> >>>> What about IXGBE_VF_TRANS_VLAN field?
> >>> This field is used for vlan strip or dcb case, which the vf rss don't need it.
> >> But VFs do support VLAN stripping and u don't add it to just RSS. If
> >> VFs do not support VLAN stripping in the DPDK yet they should and
> >> then we will need this field.
> > If I don't miss your point, you also agree it is not related to vf rss itself, right?
> 
> Right.
> 
> > As for Vlan stripping, it need another patch to support it.
> 
> Well, at least put some fat comment in bold there that some the fields in the
> command is not filled and why. ;)

OK, I will put more comments to explain it in v5.

> >
> >>>>> +
> >>>>> +	return 0;
> >>>>> +}
> >>>>> +
> >>>>> +static int
> >>>>>     ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
> >>>>>     {
> >>>>>     	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
> >>>>> +	uint16_t msg_size = IXGBE_VF_MSG_SIZE_DEFAULT;
> >>>>>     	uint32_t msgbuf[IXGBE_VFMAILBOX_SIZE];
> >>>>>     	int32_t retval;
> >>>>>     	struct ixgbe_hw *hw =
> >>>>> IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>>>> @@ -537,6 +566,10 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev
> >> *dev,
> >>>> uint16_t vf)
> >>>>>     	case IXGBE_VF_API_NEGOTIATE:
> >>>>>     		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
> >>>>>     		break;
> >>>>> +	case IXGBE_VF_GET_QUEUES:
> >>>>> +		retval = ixgbe_get_vf_queues(dev, vf, msgbuf);
> >>>>> +		msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE;
> >>>> Although the msg_size semantics and motivation is clear, if u want
> >>>> to do
> >> then
> >>>> do it all the way - add it to all other cases too not just to
> >>>> IXGBE_VF_GET_QUEUES.
> >>>> For instance, why do u write all 16 DWORDS for API negotiation
> >>>> (only 2 are
> >>>> required) and only here u decided to get "greedy"? ;)
> >>>>
> >>>> My point is: either drop it completely or fix all other places as well.
> >>> This is because the actual message size required by 2 different
> >> message(api-negotiation and vf-get-queue)
> >>> are different, the first one require only 4 bytes, the second one
> >>> need 20
> >> bytes.
> >>> If both use 4 bytes, then the second one will have incomplete message.
> >>> If both use 20 bytes, then the first one will contain garbage info
> >>> which is not
> >> necessary at all.
> >>> So the code logic looks as above.
> >> I understood the motivation at the first place but as I've explained
> >> above we already bring the garbage for some opcodes like API
> >> negotiation. So, u should either fix it for all opcodes like u did
> >> for GET_QUEUES or just drop it in GET_QUEUES and fix it for all
> >> opcodes in a different patch.
> > Here maybe I miss your point, my understanding is that  4 bytes are enough
> for all other opcode except for get_queue opcode,
> >   get_queues is the only one that need 20  bytes currently.
> > So I don't quite understand why I need fix any codes which we both think
> they are right.
> 
> Ooops. I missed the default value msg_size is 1 - I've confused its initialization
> with mbx_size initialization. So, u are right - your code is perfectly fine. My
> apologies! ;)

Never mind :-)
 
> >
> >>>>> +		break;
> >>>>>     	default:
> >>>>>     		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x",
> >>>> (unsigned)msgbuf[0]);
> >>>>>     		retval = IXGBE_ERR_MBX;
> >>>>> @@ -551,7 +584,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev
> >> *dev,
> >>>>> uint16_t vf)
> >>>>>
> >>>>>     	msgbuf[0] |= IXGBE_VT_MSGTYPE_CTS;
> >>>>>
> >>>>> -	ixgbe_write_mbx(hw, msgbuf, 1, vf);
> >>>>> +	ixgbe_write_mbx(hw, msgbuf, msg_size, vf);
> >>>>>
> >>>>>     	return retval;
> >>>>>     }

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
  2015-01-06 12:53                     ` Vlad Zolotarov
@ 2015-01-07  1:50                       ` Ouyang, Changchun
  0 siblings, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-07  1:50 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Tuesday, January 6, 2015 8:53 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
> 
> 
> On 01/06/15 04:01, Ouyang, Changchun wrote:
> >
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Monday, January 5, 2015 6:12 PM
> >> To: Ouyang, Changchun; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode
> >>
> >>
> >> On 01/05/15 04:38, Ouyang, Changchun wrote:
> >>>> -----Original Message-----
> >>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >>>> Sent: Sunday, January 4, 2015 5:47 PM
> >>>> To: Ouyang, Changchun; dev@dpdk.org
> >>>> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS
> >>>> mode
> >>>>
> >>>>
> >>>> On 01/04/15 11:01, Ouyang, Changchun wrote:
> >>>>>> -----Original Message-----
> >>>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >>>>>> Sent: Sunday, January 4, 2015 4:50 PM
> >>>>>> To: Ouyang, Changchun; dev@dpdk.org
> >>>>>> Subject: Re: [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS
> >>>>>> mode
> >>>>>>
> >>>>>>
> >>>>>> On 01/04/15 09:18, Ouyang Changchun wrote:
> >>>>>>> Set VMDq RSS mode if it has VF(VF number is more than 1) and has
> >>>>>>> RSS
> >>>>>> information.
> >>>>>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>>>>>> ---
> >>>>>>>      app/test-pmd/testpmd.c | 10 ++++++++++
> >>>>>>>      1 file changed, 10 insertions(+)
> >>>>>>>
> >>>>>>> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> >>>>>>> index 8c69756..6230f8b 100644
> >>>>>>> --- a/app/test-pmd/testpmd.c
> >>>>>>> +++ b/app/test-pmd/testpmd.c
> >>>>>>> @@ -1708,6 +1708,16 @@ init_port_config(void)
> >>>>>>>      				port->dev_conf.rxmode.mq_mode =
> >>>>>> ETH_MQ_RX_NONE;
> >>>>>>>      		}
> >>>>>>>
> >>>>>>> +		if (port->dev_info.max_vfs != 0) {
> >>>>>>> +			if (port-
> >>> dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
> >>>>>>> +				port->dev_conf.rxmode.mq_mode =
> >>>>>>> +					ETH_MQ_RX_VMDQ_RSS;
> >>>>>>> +			else {
> >>>>>>> +				port->dev_conf.rxmode.mq_mode =
> >>>>>> ETH_MQ_RX_NONE;
> >>>>>>> +				port->dev_conf.txmode.mq_mode =
> >>>>>> ETH_MQ_TX_NONE;
> >>>>>>
> >>>>>> And what about the txmode.mq_mode when RSS is available
> (the :if"
> >>>> clause)?
> >>>>> I think we can keep its original value for txmode.mq_mode, so
> >>>>> don't
> >>>> change its value. How do you think of it?
> >>>>
> >>>> I agree that not changing a Tx mq_mode in both cases would be better.
> >>> In the else clause, set txmode.mq_mode as ETH_MQ_TX_NONE
> explicitly
> >> to
> >>> make sure it is neither ETH_MQ_TX_DCB, ETH_MQ_TX_VMDQ_DCB, nor
> >> ETH_MQ_TX_VMDQ_ONLY.
> >>
> >> It's not obvious to me why u should do that since AFAIK any of these
> >> modes requires RX_RSS. Do I miss anything?
> > No, I don't think so, in the else clause, it doesn't need rx_rss, and
> > no way to do it, because the case is there is no rss configuration
> information(note: in the else clause, dev_conf.rx_adv_conf.rss_conf.rss_hf
> == 0).
> >
> > So ETH_MQ_RX_NONE for rx_mode, and ETH_MQ_TX_NONE for tx_mode.
> 
> Of course, however, in general, one may ask, why u configure TX MQ mode
> in "else" clause an don't do it in the "if" one. Possibly the "if" case in TX MQ
> context has been handled elsewhere but this is what makes this code
> confusing: to make it the most readable u'd rather configure the same
> feature set in both "if" and "else".
> For instance:
> 
> if (bla-bla) {
>    tx_mode = X1;
>    rx_mode = X2;
> } else {
>   tx_mode = Y1;
>   rx_mode = Y2;
> }
> 
> Look at the non-SR-IOV clause right above the "if-else" block u've added.
> Why don't they configure tx_mode there? Is it a bug in their code?

It also makes sense,  I will add  tx_mode = ETH_MQ_TX_NONE as no rss for tx mode,
Rss only for rx mode.

> By the way, u forgot to fix the remark below
> 
> /* In SR-IOV mode, RSS mode is not available */
> 
> which is located a few lines above the code u've added. ;)

Sorry, I missed these few lines before, I will remove them in v5. 

Thanks
Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
  2015-01-06 19:56                     ` Vlad Zolotarov
@ 2015-01-07  2:28                       ` Ouyang, Changchun
  0 siblings, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-07  2:28 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Wednesday, January 7, 2015 3:56 AM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
> 
> 
> On 01/06/15 03:56, Ouyang, Changchun wrote:
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Monday, January 5, 2015 6:10 PM
> >> To: Ouyang, Changchun;dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
> >>
> >>
> >> On 01/05/15 03:00, Ouyang, Changchun wrote:
> >>>> -----Original Message-----
> >>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >>>> Sent: Sunday, January 4, 2015 5:46 PM
> >>>> To: Ouyang, Changchun;dev@dpdk.org
> >>>> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode
> >>>>
> >>>>
> >>>> On 01/04/15 10:58, Ouyang, Changchun wrote:
> >>>>>> -----Original Message-----
> >>>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >>>>>> Sent: Sunday, January 4, 2015 4:45 PM
> >>>>>> To: Ouyang, Changchun;dev@dpdk.org
> >>>>>> Subject: Re: [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS
> mode
> >>>>>>
> >>>>>>
> >>>>>> On 01/04/15 09:18, Ouyang Changchun wrote:
> >>>>>>> Check mq mode for VMDq RSS, handle it correctly instead of
> >>>>>>> returning an error; Also remove the limitation of per pool queue
> >>>>>>> number has max value of 1, because the per pool queue number
> >> could
> >>>>>>> be 2 or 4 if it is VMDq RSS mode;
> >>>>>>>
> >>>>>>> The number of rxq specified in config will determine the mq mode
> >>>>>>> for
> >>>>>> VMDq RSS.
> >>>>>>> Signed-off-by: Changchun Ouyang<changchun.ouyang@intel.com>
> >>>>>>> ---
> >>>>>>>      lib/librte_ether/rte_ethdev.c | 39
> >>>>>> ++++++++++++++++++++++++++++++++++-----
> >>>>>>>      1 file changed, 34 insertions(+), 5 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/lib/librte_ether/rte_ethdev.c
> >>>>>>> b/lib/librte_ether/rte_ethdev.c index 95f2ceb..59ff325 100644
> >>>>>>> --- a/lib/librte_ether/rte_ethdev.c
> >>>>>>> +++ b/lib/librte_ether/rte_ethdev.c
> >>>>>>> @@ -510,8 +510,7 @@ rte_eth_dev_check_mq_mode(uint8_t
> >> port_id,
> >>>>>>> uint16_t nb_rx_q, uint16_t nb_tx_q,
> >>>>>>>
> >>>>>>>      	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
> >>>>>>>      		/* check multi-queue mode */
> >>>>>>> -		if ((dev_conf->rxmode.mq_mode ==
> >> ETH_MQ_RX_RSS) ||
> >>>>>>> -		    (dev_conf->rxmode.mq_mode ==
> >> ETH_MQ_RX_DCB) ||
> >>>>>>> +		if ((dev_conf->rxmode.mq_mode ==
> >> ETH_MQ_RX_DCB) ||
> >>>>>>>      		    (dev_conf->rxmode.mq_mode ==
> >> ETH_MQ_RX_DCB_RSS)
> >>>>>> ||
> >>>>>>>      		    (dev_conf->txmode.mq_mode ==
> >> ETH_MQ_TX_DCB)) {
> >>>>>>>      			/* SRIOV only works in VMDq enable mode
> >> */ @@ -
> >>>>>> 525,7 +524,6 @@
> >>>>>>> rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
> >>>>>> uint16_t nb_tx_q,
> >>>>>>>      		}
> >>>>>>>
> >>>>>>>      		switch (dev_conf->rxmode.mq_mode) {
> >>>>>>> -		case ETH_MQ_RX_VMDQ_RSS:
> >>>>>>>      		case ETH_MQ_RX_VMDQ_DCB:
> >>>>>>>      		case ETH_MQ_RX_VMDQ_DCB_RSS:
> >>>>>>>      			/* DCB/RSS VMDQ in SRIOV mode, not
> >> implement
> >>>>>> yet */ @@ -534,6
> >>>>>>> +532,39 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,
> uint16_t
> >>>>>> nb_rx_q, uint16_t nb_tx_q,
> >>>>>>>      					"unsupported VMDQ
> >> mq_mode
> >>>>>> rx %u\n",
> >>>>>>>      					port_id, dev_conf-
> >>>>>>> rxmode.mq_mode);
> >>>>>>>      			return (-EINVAL);
> >>>>>>> +		case ETH_MQ_RX_RSS:
> >>>>>>> +			PMD_DEBUG_TRACE("ethdev port_id=%"
> >> PRIu8
> >>>>>>> +					" SRIOV active, "
> >>>>>>> +					"Rx mq mode is changed
> >> from:"
> >>>>>>> +					"mq_mode %u into VMDQ
> >>>>>> mq_mode %u\n",
> >>>>>>> +					port_id,
> >>>>>>> +					dev_conf-
> >>> rxmode.mq_mode,
> >>>>>>> +					dev->data-
> >>>>>>> dev_conf.rxmode.mq_mode);
> >>>>>>> +		case ETH_MQ_RX_VMDQ_RSS:
> >>>>>>> +			dev->data->dev_conf.rxmode.mq_mode =
> >>>>>> ETH_MQ_RX_VMDQ_RSS;
> >>>>>>> +			if (nb_rx_q <
> >>>>>> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) {
> >>>> Missed that before: shouldn't it be "<=" here?
> >>> Agree with you, need <= here, I will fix it in v5
> >>>
> >>>>>>> +				switch (nb_rx_q) {
> >>>>>>> +				case 1:
> >>>>>>> +				case 2:
> >>>>>>> +
> >> 	RTE_ETH_DEV_SRIOV(dev).active =
> >>>>>>> +						ETH_64_POOLS;
> >>>>>>> +					break;
> >>>>>>> +				case 4:
> >>>>>>> +
> >> 	RTE_ETH_DEV_SRIOV(dev).active =
> >>>>>>> +						ETH_32_POOLS;
> >>>>>>> +					break;
> >>>>>>> +				default:
> >>>>>>> +
> >> 	PMD_DEBUG_TRACE("ethdev
> >>>>>> port_id=%d"
> >>>>>>> +						" SRIOV active, "
> >>>>>>> +						"queue number
> >> invalid\n",
> >>>>>>> +						port_id);
> >>>>>>> +					return -EINVAL;
> >>>>>>> +				}
> >>>>>>> +
> >> 	RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool =
> >>>>>> nb_rx_q;
> >>>>>>> +
> >> 	RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
> >>>>>>> +					dev->pci_dev->max_vfs *
> >> nb_rx_q;
> >>>>>>> +			}
> >>>>>> Don't u need to return an error in the "else" here?
> >>>>> Actually it has such a check after these code snippet, and it does
> >>>>> return error for the else case, Because it is original logic, I
> >>>>> don't change any
> >>>> code around it, so it doesn't display here, you can check the codes.
> >>>>
> >>>> I see. The flow is a bit confusing since the switch-case above will
> >>>> end up executing a "default" clause which will set
> >>>> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool to 1 and then the error
> >> message
> >>>> in the check u are referring will be a bit confusing.
> >>> ' set RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool to 1 ' is original code,
> >> which is for vmdq only case, or single queue case.
> >>> It is in default clause, and not in VMDQ_RSS clause.
> >>> I think my new code is ok here.
> >> The original code is ok and your current code will work. The only
> >> problem with your new code is that in case on an error like I've
> >> described above the error message will be confusing.
> > Then what's your suggestion for the better log message?  I can consider
> refine it if you have better one.
> 
> Just like I've suggested before - u may break with appropriate error message
> right when u see the problem (in a "else" clause). This way the code will be
> both more readable and more robust and won't break if anybody decides to
> change the not-RSS-specific logic u r relying on.

Well, it couldn't be done so easily, I think,  the test condition is:
if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool),  
so the else clause is the case of nb_rx_q > RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool,
its functionality is comparing nb_rx_q and RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool,

but the switch case will further confine nb_rx_q to 1 or 2 or 4 on the condition of it passes the above test,

and also there are codes refine the RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool etc.

just changing the return into break, will break the logic, 
e.g.
when  nb_rx_q is 8, and RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool is 8,
the test pass, and go into default branch,
it just print some message and break, 
continue refining(but nothing changed this time),
then check valid queue number a few lines below, this time it fail the test, because 
nb_rx_q == rather than >  RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool , so it doesn't print err mesge and don't return the -EINVAL.

Then the behavior is not expected.

From other hand,
The reason why I have not the else branch for the test nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool,
It  is because there is same check below itself, and just don't want the duplicated check for the same thing

> >>>>> Thanks
> >>>>> Changchun
> >>>>>
> >>>>>


^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic
  2015-01-04  7:18     ` [dpdk-dev] [PATCH v4 " Ouyang Changchun
                         ` (5 preceding siblings ...)
  2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
@ 2015-01-07  6:32       ` Ouyang Changchun
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 1/6] ixgbe: Code cleanup Ouyang Changchun
                           ` (7 more replies)
  6 siblings, 8 replies; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-07  6:32 UTC (permalink / raw)
  To: dev

This patch enables VF RSS for Niantic, which allow each VF having at most 4 queues.
The actual queue number per VF depends on the total number of pool, which is
determined by the max number of VF at PF initialization stage and the number of
queue specified in config:
1) If the max number of VF is in the range from 1 to 32, and the number of rxq is 4
('--rxq 4' in testpmd), then there is totally 32 pools(ETH_32_POOLS), and each VF
have 4 queues;
 
2)If the max number of VF is in the range from 33 to 64, and the number of rxq is 2
('--rxq 2' in testpmd), then there is totally 64 pools(ETH_64_POOLS), and each VF
have 2 queues;
 
On host, to enable VF RSS functionality, rx mq mode should be set as ETH_MQ_RX_VMDQ_RSS
or ETH_MQ_RX_RSS mode, and SRIOV mode should be activated(max_vfs >= 1).
It also needs config VF RSS information like hash function, RSS key, RSS key length.
 
The limitation for Niantic VF RSS is:
the hash and key are shared among PF and all VF, the RETA table with 128 entries are
also shared among PF and all VF. So it could not to provide a method to query the hash
and reta content per VF on guest, while, if possible, please query them on host(PF) for
the shared RETA information.

changes in v5:
  - Fix minor issue and some comments;

changes in v4:
  - Extract a function to remove embeded switch-case statement;
  - Check whether RX queue number is a valid one, otherwise return error;
  - Update the description a bit;
 
changes in v3:
  - More cleanup;
 
changes in v2:
  - Update the description;
  - Use receiving queue number('--rxq <q-num>') specified in config to determine the
    number of pool and the number of queue per VF;
 
changes in v1:
  - Config VF RSS;

Changchun Ouyang (6):
  ixgbe: Code cleanup
  ixgbe: Negotiate VF API version
  ixgbe: Get VF queue number
  ether: Check VMDq RSS mode
  ixgbe: Config VF RSS
  testpmd: Set Rx VMDq RSS mode

 app/test-pmd/testpmd.c              |  15 +++-
 lib/librte_ether/rte_ethdev.c       |  50 +++++++++++--
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |   1 +
 lib/librte_pmd_ixgbe/ixgbe_pf.c     |  80 ++++++++++++++++++++-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c   | 138 ++++++++++++++++++++++++++++--------
 5 files changed, 248 insertions(+), 36 deletions(-)

-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v5 1/6] ixgbe: Code cleanup
  2015-01-07  6:32       ` [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic Ouyang Changchun
@ 2015-01-07  6:32         ` Ouyang Changchun
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
                           ` (6 subsequent siblings)
  7 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-07  6:32 UTC (permalink / raw)
  To: dev

Put global register configuring out of loop for queue; also fix typo and indent;

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 5c36bff..f69abda 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3548,9 +3548,9 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
 				IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
 			}
 			srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
-				   IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
-				  IXGBE_SRRCTL_BSIZEHDR_MASK);
-			srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+				IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
+				IXGBE_SRRCTL_BSIZEHDR_MASK);
+			srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
 		} else
 #endif
 			srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
@@ -3985,7 +3985,7 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 	struct igb_rx_queue *rxq;
 	struct rte_pktmbuf_pool_private *mbp_priv;
 	uint64_t bus_addr;
-	uint32_t srrctl;
+	uint32_t srrctl, psrtype = 0;
 	uint16_t buf_size;
 	uint16_t i;
 	int ret;
@@ -4039,20 +4039,10 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		 * Configure Header Split
 		 */
 		if (dev->data->dev_conf.rxmode.header_split) {
-
-			/* Must setup the PSRTYPE register */
-			uint32_t psrtype;
-			psrtype = IXGBE_PSRTYPE_TCPHDR |
-				IXGBE_PSRTYPE_UDPHDR   |
-				IXGBE_PSRTYPE_IPV4HDR  |
-				IXGBE_PSRTYPE_IPV6HDR;
-
-			IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
-
 			srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
-				   IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
-				  IXGBE_SRRCTL_BSIZEHDR_MASK);
-			srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+				IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
+				IXGBE_SRRCTL_BSIZEHDR_MASK);
+			srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
 		} else
 #endif
 			srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
@@ -4095,6 +4085,17 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		}
 	}
 
+#ifdef RTE_HEADER_SPLIT_ENABLE
+	if (dev->data->dev_conf.rxmode.header_split)
+		/* Must setup the PSRTYPE register */
+		psrtype = IXGBE_PSRTYPE_TCPHDR |
+			IXGBE_PSRTYPE_UDPHDR   |
+			IXGBE_PSRTYPE_IPV4HDR  |
+			IXGBE_PSRTYPE_IPV6HDR;
+#endif
+
+	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
+
 	if (dev->data->dev_conf.rxmode.enable_scatter) {
 		if (!dev->data->scattered_rx)
 			PMD_INIT_LOG(DEBUG, "forcing scatter mode");
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v5 2/6] ixgbe: Negotiate VF API version
  2015-01-07  6:32       ` [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic Ouyang Changchun
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 1/6] ixgbe: Code cleanup Ouyang Changchun
@ 2015-01-07  6:32         ` Ouyang Changchun
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 3/6] ixgbe: Get VF queue number Ouyang Changchun
                           ` (5 subsequent siblings)
  7 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-07  6:32 UTC (permalink / raw)
  To: dev

Negotiate API version with VF when receiving the IXGBE_VF_API_NEGOTIATE message.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  1 +
 lib/librte_pmd_ixgbe/ixgbe_pf.c     | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
index ca99170..730098d 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
@@ -159,6 +159,7 @@ struct ixgbe_vf_info {
 	uint16_t tx_rate[IXGBE_MAX_QUEUE_NUM_PER_VF];
 	uint16_t vlan_count;
 	uint8_t spoofchk_enabled;
+	uint8_t api_version;
 };
 
 /*
diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index 51da1fd..495aff5 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -469,6 +469,28 @@ ixgbe_set_vf_lpe(struct rte_eth_dev *dev, __rte_unused uint32_t vf, uint32_t *ms
 }
 
 static int
+ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
+{
+	uint32_t api_version = msgbuf[1];
+	struct ixgbe_vf_info *vfinfo =
+		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
+
+	switch (api_version) {
+	case ixgbe_mbox_api_10:
+	case ixgbe_mbox_api_11:
+		vfinfo[vf].api_version = (uint8_t)api_version;
+		return 0;
+	default:
+		break;
+	}
+
+	RTE_LOG(ERR, PMD, "Negotiate invalid api version %u from VF %d\n",
+		api_version, vf);
+
+	return -1;
+}
+
+static int
 ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 {
 	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
@@ -512,6 +534,9 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 	case IXGBE_VF_SET_VLAN:
 		retval = ixgbe_vf_set_vlan(dev, vf, msgbuf);
 		break;
+	case IXGBE_VF_API_NEGOTIATE:
+		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
+		break;
 	default:
 		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v5 3/6] ixgbe: Get VF queue number
  2015-01-07  6:32       ` [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic Ouyang Changchun
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 1/6] ixgbe: Code cleanup Ouyang Changchun
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
@ 2015-01-07  6:32         ` Ouyang Changchun
  2015-01-08  9:01           ` Vlad Zolotarov
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode Ouyang Changchun
                           ` (4 subsequent siblings)
  7 siblings, 1 reply; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-07  6:32 UTC (permalink / raw)
  To: dev

Get the available Rx and Tx queue number when receiving IXGBE_VF_GET_QUEUES message from VF.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

changes in v5
  - Add some 'FIX ME' comments for IXGBE_VF_TRANS_VLAN.

---
 lib/librte_pmd_ixgbe/ixgbe_pf.c | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index 495aff5..dbda9b5 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -53,6 +53,8 @@
 #include "ixgbe_ethdev.h"
 
 #define IXGBE_MAX_VFTA     (128)
+#define IXGBE_VF_MSG_SIZE_DEFAULT 1
+#define IXGBE_VF_GET_QUEUE_MSG_SIZE 5
 
 static inline uint16_t
 dev_num_vf(struct rte_eth_dev *eth_dev)
@@ -491,9 +493,41 @@ ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
 }
 
 static int
+ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
+{
+	struct ixgbe_vf_info *vfinfo =
+		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
+	uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+
+	/* Verify if the PF supports the mbox APIs version or not */
+	switch (vfinfo[vf].api_version) {
+	case ixgbe_mbox_api_20:
+	case ixgbe_mbox_api_11:
+		break;
+	default:
+		return -1;
+	}
+
+	/* Notify VF of Rx and Tx queue number */
+	msgbuf[IXGBE_VF_RX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+	msgbuf[IXGBE_VF_TX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+
+	/* Notify VF of default queue */
+	msgbuf[IXGBE_VF_DEF_QUEUE] = default_q;
+
+	/*
+	 * FIX ME if it needs fill msgbuf[IXGBE_VF_TRANS_VLAN]
+	 * for VLAN strip or VMDQ_DCB or VMDQ_DCB_RSS
+	 */
+
+	return 0;
+}
+
+static int
 ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 {
 	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
+	uint16_t msg_size = IXGBE_VF_MSG_SIZE_DEFAULT;
 	uint32_t msgbuf[IXGBE_VFMAILBOX_SIZE];
 	int32_t retval;
 	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -537,6 +571,10 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 	case IXGBE_VF_API_NEGOTIATE:
 		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
 		break;
+	case IXGBE_VF_GET_QUEUES:
+		retval = ixgbe_get_vf_queues(dev, vf, msgbuf);
+		msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE;
+		break;
 	default:
 		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
@@ -551,7 +589,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 
 	msgbuf[0] |= IXGBE_VT_MSGTYPE_CTS;
 
-	ixgbe_write_mbx(hw, msgbuf, 1, vf);
+	ixgbe_write_mbx(hw, msgbuf, msg_size, vf);
 
 	return retval;
 }
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
  2015-01-07  6:32       ` [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic Ouyang Changchun
                           ` (2 preceding siblings ...)
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 3/6] ixgbe: Get VF queue number Ouyang Changchun
@ 2015-01-07  6:32         ` Ouyang Changchun
  2015-01-08  9:19           ` Vlad Zolotarov
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 5/6] ixgbe: Config VF RSS Ouyang Changchun
                           ` (3 subsequent siblings)
  7 siblings, 1 reply; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-07  6:32 UTC (permalink / raw)
  To: dev

Check mq mode for VMDq RSS, handle it correctly instead of returning an error;
Also remove the limitation of per pool queue number has max value of 1, because
the per pool queue number could be 2 or 4 if it is VMDq RSS mode;

The number of rxq specified in config will determine the mq mode for VMDq RSS.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

changes in v5:
  - Fix '<' issue, it should be '<=' to test rxq number;
  - Extract a function to remove the embeded switch-case statement.

---
 lib/librte_ether/rte_ethdev.c | 50 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 95f2ceb..8363e26 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -503,6 +503,31 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
 }
 
 static int
+rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	switch (nb_rx_q) {
+	case 1:
+	case 2:
+		RTE_ETH_DEV_SRIOV(dev).active =
+			ETH_64_POOLS;
+		break;
+	case 4:
+		RTE_ETH_DEV_SRIOV(dev).active =
+			ETH_32_POOLS;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
+	RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
+		dev->pci_dev->max_vfs * nb_rx_q;
+
+	return 0;
+}
+
+static int
 rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		      const struct rte_eth_conf *dev_conf)
 {
@@ -510,8 +535,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 
 	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
 		/* check multi-queue mode */
-		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
-		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
+		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
 		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||
 		    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
 			/* SRIOV only works in VMDq enable mode */
@@ -525,7 +549,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		}
 
 		switch (dev_conf->rxmode.mq_mode) {
-		case ETH_MQ_RX_VMDQ_RSS:
 		case ETH_MQ_RX_VMDQ_DCB:
 		case ETH_MQ_RX_VMDQ_DCB_RSS:
 			/* DCB/RSS VMDQ in SRIOV mode, not implement yet */
@@ -534,6 +557,25 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 					"unsupported VMDQ mq_mode rx %u\n",
 					port_id, dev_conf->rxmode.mq_mode);
 			return (-EINVAL);
+		case ETH_MQ_RX_RSS:
+			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
+					" SRIOV active, "
+					"Rx mq mode is changed from:"
+					"mq_mode %u into VMDQ mq_mode %u\n",
+					port_id,
+					dev_conf->rxmode.mq_mode,
+					dev->data->dev_conf.rxmode.mq_mode);
+		case ETH_MQ_RX_VMDQ_RSS:
+			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_RSS;
+			if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)
+				if (rte_eth_dev_check_vf_rss_rxq_num(port_id, nb_rx_q) != 0) {
+					PMD_DEBUG_TRACE("ethdev port_id=%d"
+						" SRIOV active, invalid queue"
+						" number for VMDQ RSS\n",
+						port_id);
+					return -EINVAL;
+				}
+			break;
 		default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
 			/* if nothing mq mode configure, use default scheme */
 			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;
@@ -553,8 +595,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
 			/* if nothing mq mode configure, use default scheme */
 			dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;
-			if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
-				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
 			break;
 		}
 
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v5 5/6] ixgbe: Config VF RSS
  2015-01-07  6:32       ` [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic Ouyang Changchun
                           ` (3 preceding siblings ...)
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode Ouyang Changchun
@ 2015-01-07  6:32         ` Ouyang Changchun
  2015-01-08  9:43           ` Vlad Zolotarov
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
                           ` (2 subsequent siblings)
  7 siblings, 1 reply; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-07  6:32 UTC (permalink / raw)
  To: dev

It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF RSS.

The psrtype will determine how many queues the received packets will distribute to,
and the value of psrtype should depends on both facet: max VF rxq number which
has been negotiated with PF, and the number of rxq specified in config on guest.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

Changes in v4:
 - the number of rxq from config should be power of 2 and should not bigger than
    max VF rxq number(negotiated between guest and host).

---
 lib/librte_pmd_ixgbe/ixgbe_pf.c   |  15 ++++++
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 103 +++++++++++++++++++++++++++++++++-----
 2 files changed, 106 insertions(+), 12 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index dbda9b5..93f6e43 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev *eth_dev)
 	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw->mac.num_rar_entries), 0);
 	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw->mac.num_rar_entries), 0);
 
+	/*
+	 * VF RSS can support at most 4 queues for each VF, even if
+	 * 8 queues are available for each VF, it need refine to 4
+	 * queues here due to this limitation, otherwise no queue
+	 * will receive any packet even RSS is enabled.
+	 */
+	if (eth_dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_RSS) {
+		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
+			RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_32_POOLS;
+			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
+			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
+				dev_num_vf(eth_dev) * 4;
+		}
+	}
+
 	/* set VMDq map to default PF pool */
 	hw->mac.ops.set_vmdq(hw, 0, RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
 
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index f69abda..e83a9ab 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3327,6 +3327,68 @@ ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
 }
 
 static int
+ixgbe_config_vf_rss(struct rte_eth_dev *dev)
+{
+	struct ixgbe_hw *hw;
+	uint32_t mrqc;
+
+	ixgbe_rss_configure(dev);
+
+	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+	/* MRQC: enable VF RSS */
+	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
+	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
+	switch (RTE_ETH_DEV_SRIOV(dev).active) {
+	case ETH_64_POOLS:
+		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
+		break;
+
+	case ETH_32_POOLS:
+	case ETH_16_POOLS:
+		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
+		break;
+
+	default:
+		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
+		return -EINVAL;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+
+	return 0;
+}
+
+static int
+ixgbe_config_vf_default(struct rte_eth_dev *dev)
+{
+	struct ixgbe_hw *hw =
+		IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+	switch (RTE_ETH_DEV_SRIOV(dev).active) {
+	case ETH_64_POOLS:
+		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+			IXGBE_MRQC_VMDQEN);
+		break;
+
+	case ETH_32_POOLS:
+		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+			IXGBE_MRQC_VMDQRT4TCEN);
+		break;
+
+	case ETH_16_POOLS:
+		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+			IXGBE_MRQC_VMDQRT8TCEN);
+		break;
+	default:
+		PMD_INIT_LOG(ERR,
+			"invalid pool number in IOV mode");
+		break;
+	}
+	return 0;
+}
+
+static int
 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw *hw =
@@ -3358,24 +3420,25 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
 			default: ixgbe_rss_disable(dev);
 		}
 	} else {
-		switch (RTE_ETH_DEV_SRIOV(dev).active) {
 		/*
 		 * SRIOV active scheme
-		 * FIXME if support DCB/RSS together with VMDq & SRIOV
+		 * Support RSS together with VMDq & SRIOV
 		 */
-		case ETH_64_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQEN);
-			break;
-
-		case ETH_32_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT4TCEN);
+		switch (dev->data->dev_conf.rxmode.mq_mode) {
+		case ETH_MQ_RX_RSS:
+		case ETH_MQ_RX_VMDQ_RSS:
+			ixgbe_config_vf_rss(dev);
 			break;
 
-		case ETH_16_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT8TCEN);
-			break;
+		/* FIXME if support DCB/RSS together with VMDq & SRIOV */
+		case ETH_MQ_RX_VMDQ_DCB:
+		case ETH_MQ_RX_VMDQ_DCB_RSS:
+			PMD_INIT_LOG(ERR,
+				"Could not support DCB with VMDq & SRIOV");
+			return -1;
 		default:
-			PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
+			ixgbe_config_vf_default(dev);
+			break;
 		}
 	}
 
@@ -3993,6 +4056,19 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 	PMD_INIT_FUNC_TRACE();
 	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
+	if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
+		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
+			"it should be power of 2");
+		return -1;
+	}
+
+	if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
+		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
+			"it should be equal to or less than %d",
+			hw->mac.max_rx_queues);
+		return -1;
+	}
+
 	/*
 	 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
 	 * disables the VF receipt of packets if the PF MTU is > 1500.
@@ -4094,6 +4170,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 			IXGBE_PSRTYPE_IPV6HDR;
 #endif
 
+	/* Set RQPL for VF RSS according to max Rx queue */
+	psrtype |= (dev->data->nb_rx_queues >> 1) <<
+		IXGBE_PSRTYPE_RQPL_SHIFT;
 	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
 
 	if (dev->data->dev_conf.rxmode.enable_scatter) {
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v5 6/6] testpmd: Set Rx VMDq RSS mode
  2015-01-07  6:32       ` [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic Ouyang Changchun
                           ` (4 preceding siblings ...)
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 5/6] ixgbe: Config VF RSS Ouyang Changchun
@ 2015-01-07  6:32         ` Ouyang Changchun
  2015-01-08  9:46           ` Vlad Zolotarov
  2015-01-08  9:56         ` [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic Vlad Zolotarov
  2015-01-12  5:59         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
  7 siblings, 1 reply; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-07  6:32 UTC (permalink / raw)
  To: dev

Set VMDq RSS mode if it has VF(VF number is more than 1) and has RSS information.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

changes in v5
  - Assign txmode.mq_mode with ETH_MQ_TX_NONE explicitly;
  - Remove one line wrong comment.

---
 app/test-pmd/testpmd.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 8c69756..64fd4ee 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -1700,7 +1700,6 @@ init_port_config(void)
 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
 		}
 
-		/* In SR-IOV mode, RSS mode is not available */
 		if (port->dcb_flag == 0 && port->dev_info.max_vfs == 0) {
 			if( port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
@@ -1708,6 +1707,20 @@ init_port_config(void)
 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
 		}
 
+		if (port->dev_info.max_vfs != 0) {
+			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0) {
+				port->dev_conf.rxmode.mq_mode =
+					ETH_MQ_RX_VMDQ_RSS;
+				port->dev_conf.txmode.mq_mode =
+					ETH_MQ_TX_NONE;
+			} else {
+				port->dev_conf.rxmode.mq_mode =
+					ETH_MQ_RX_NONE;
+				port->dev_conf.txmode.mq_mode =
+					ETH_MQ_TX_NONE;
+			}
+		}
+
 		port->rx_conf.rx_thresh = rx_thresh;
 		port->rx_conf.rx_free_thresh = rx_free_thresh;
 		port->rx_conf.rx_drop_en = rx_drop_en;
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/6] ixgbe: Get VF queue number
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 3/6] ixgbe: Get VF queue number Ouyang Changchun
@ 2015-01-08  9:01           ` Vlad Zolotarov
  0 siblings, 0 replies; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-08  9:01 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 01/07/15 08:32, Ouyang Changchun wrote:
> Get the available Rx and Tx queue number when receiving IXGBE_VF_GET_QUEUES message from VF.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

Reviewed-by: Vlad Zolotarov <vladz@cloudius-systems.com>

>
> changes in v5
>    - Add some 'FIX ME' comments for IXGBE_VF_TRANS_VLAN.
>
> ---
>   lib/librte_pmd_ixgbe/ixgbe_pf.c | 40 +++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 39 insertions(+), 1 deletion(-)
>
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> index 495aff5..dbda9b5 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> @@ -53,6 +53,8 @@
>   #include "ixgbe_ethdev.h"
>   
>   #define IXGBE_MAX_VFTA     (128)
> +#define IXGBE_VF_MSG_SIZE_DEFAULT 1
> +#define IXGBE_VF_GET_QUEUE_MSG_SIZE 5
>   
>   static inline uint16_t
>   dev_num_vf(struct rte_eth_dev *eth_dev)
> @@ -491,9 +493,41 @@ ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
>   }
>   
>   static int
> +ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
> +{
> +	struct ixgbe_vf_info *vfinfo =
> +		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
> +	uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> +
> +	/* Verify if the PF supports the mbox APIs version or not */
> +	switch (vfinfo[vf].api_version) {
> +	case ixgbe_mbox_api_20:
> +	case ixgbe_mbox_api_11:
> +		break;
> +	default:
> +		return -1;
> +	}
> +
> +	/* Notify VF of Rx and Tx queue number */
> +	msgbuf[IXGBE_VF_RX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> +	msgbuf[IXGBE_VF_TX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> +
> +	/* Notify VF of default queue */
> +	msgbuf[IXGBE_VF_DEF_QUEUE] = default_q;
> +
> +	/*
> +	 * FIX ME if it needs fill msgbuf[IXGBE_VF_TRANS_VLAN]
> +	 * for VLAN strip or VMDQ_DCB or VMDQ_DCB_RSS
> +	 */
> +
> +	return 0;
> +}
> +
> +static int
>   ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
>   {
>   	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
> +	uint16_t msg_size = IXGBE_VF_MSG_SIZE_DEFAULT;
>   	uint32_t msgbuf[IXGBE_VFMAILBOX_SIZE];
>   	int32_t retval;
>   	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> @@ -537,6 +571,10 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
>   	case IXGBE_VF_API_NEGOTIATE:
>   		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
>   		break;
> +	case IXGBE_VF_GET_QUEUES:
> +		retval = ixgbe_get_vf_queues(dev, vf, msgbuf);
> +		msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE;
> +		break;
>   	default:
>   		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
>   		retval = IXGBE_ERR_MBX;
> @@ -551,7 +589,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
>   
>   	msgbuf[0] |= IXGBE_VT_MSGTYPE_CTS;
>   
> -	ixgbe_write_mbx(hw, msgbuf, 1, vf);
> +	ixgbe_write_mbx(hw, msgbuf, msg_size, vf);
>   
>   	return retval;
>   }

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode Ouyang Changchun
@ 2015-01-08  9:19           ` Vlad Zolotarov
  2015-01-08 18:48             ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-08  9:19 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 01/07/15 08:32, Ouyang Changchun wrote:
> Check mq mode for VMDq RSS, handle it correctly instead of returning an error;
> Also remove the limitation of per pool queue number has max value of 1, because
> the per pool queue number could be 2 or 4 if it is VMDq RSS mode;
>
> The number of rxq specified in config will determine the mq mode for VMDq RSS.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>
> changes in v5:
>    - Fix '<' issue, it should be '<=' to test rxq number;
>    - Extract a function to remove the embeded switch-case statement.
>
> ---
>   lib/librte_ether/rte_ethdev.c | 50 ++++++++++++++++++++++++++++++++++++++-----
>   1 file changed, 45 insertions(+), 5 deletions(-)
>
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index 95f2ceb..8363e26 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -503,6 +503,31 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
>   }
>   
>   static int
> +rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)
> +{
> +	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
> +	switch (nb_rx_q) {
> +	case 1:
> +	case 2:
> +		RTE_ETH_DEV_SRIOV(dev).active =
> +			ETH_64_POOLS;
> +		break;
> +	case 4:
> +		RTE_ETH_DEV_SRIOV(dev).active =
> +			ETH_32_POOLS;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
> +	RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
> +		dev->pci_dev->max_vfs * nb_rx_q;
> +
> +	return 0;
> +}
> +
> +static int
>   rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   		      const struct rte_eth_conf *dev_conf)
>   {
> @@ -510,8 +535,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   
>   	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
>   		/* check multi-queue mode */
> -		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
> -		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
> +		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>   		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||
>   		    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
>   			/* SRIOV only works in VMDq enable mode */
> @@ -525,7 +549,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   		}
>   
>   		switch (dev_conf->rxmode.mq_mode) {
> -		case ETH_MQ_RX_VMDQ_RSS:
>   		case ETH_MQ_RX_VMDQ_DCB:
>   		case ETH_MQ_RX_VMDQ_DCB_RSS:
>   			/* DCB/RSS VMDQ in SRIOV mode, not implement yet */
> @@ -534,6 +557,25 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   					"unsupported VMDQ mq_mode rx %u\n",
>   					port_id, dev_conf->rxmode.mq_mode);
>   			return (-EINVAL);
> +		case ETH_MQ_RX_RSS:
> +			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
> +					" SRIOV active, "
> +					"Rx mq mode is changed from:"
> +					"mq_mode %u into VMDQ mq_mode %u\n",
> +					port_id,
> +					dev_conf->rxmode.mq_mode,
> +					dev->data->dev_conf.rxmode.mq_mode);
> +		case ETH_MQ_RX_VMDQ_RSS:
> +			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_RSS;
> +			if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)
> +				if (rte_eth_dev_check_vf_rss_rxq_num(port_id, nb_rx_q) != 0) {
> +					PMD_DEBUG_TRACE("ethdev port_id=%d"
> +						" SRIOV active, invalid queue"
> +						" number for VMDQ RSS\n",
> +						port_id);

Some nitpicking here: I'd add the allowed values descriptions to the 
error message. Something like: "invalid queue number for VMDQ RSS. 
Allowed values are 1, 2 or 4\n".

> +					return -EINVAL;
> +				}
> +			break;
>   		default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
>   			/* if nothing mq mode configure, use default scheme */
>   			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;
> @@ -553,8 +595,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   		default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
>   			/* if nothing mq mode configure, use default scheme */
>   			dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;
> -			if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
> -				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;

I'm not sure u may just remove it. These lines originally belong to a 
different flow. Are u sure u can remove them like that? What if the 
mq_mode is ETH_MQ_RX_NONE and nb_q_per_pool has been initialized to 4 or 
8 in ixgbe_pf_host_init()?

>   			break;
>   		}
>   

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 5/6] ixgbe: Config VF RSS
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 5/6] ixgbe: Config VF RSS Ouyang Changchun
@ 2015-01-08  9:43           ` Vlad Zolotarov
  2015-01-09  6:07             ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-08  9:43 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 01/07/15 08:32, Ouyang Changchun wrote:
> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF RSS.
>
> The psrtype will determine how many queues the received packets will distribute to,
> and the value of psrtype should depends on both facet: max VF rxq number which
> has been negotiated with PF, and the number of rxq specified in config on guest.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>
> Changes in v4:
>   - the number of rxq from config should be power of 2 and should not bigger than
>      max VF rxq number(negotiated between guest and host).
>
> ---
>   lib/librte_pmd_ixgbe/ixgbe_pf.c   |  15 ++++++
>   lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 103 +++++++++++++++++++++++++++++++++-----
>   2 files changed, 106 insertions(+), 12 deletions(-)
>
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> index dbda9b5..93f6e43 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev *eth_dev)
>   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw->mac.num_rar_entries), 0);
>   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw->mac.num_rar_entries), 0);
>   
> +	/*
> +	 * VF RSS can support at most 4 queues for each VF, even if
> +	 * 8 queues are available for each VF, it need refine to 4
> +	 * queues here due to this limitation, otherwise no queue
> +	 * will receive any packet even RSS is enabled.
> +	 */
> +	if (eth_dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_RSS) {
> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> +			RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_32_POOLS;
> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> +				dev_num_vf(eth_dev) * 4;
> +		}
> +	}
> +
>   	/* set VMDq map to default PF pool */
>   	hw->mac.ops.set_vmdq(hw, 0, RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
>   
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> index f69abda..e83a9ab 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> @@ -3327,6 +3327,68 @@ ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
>   }
>   
>   static int
> +ixgbe_config_vf_rss(struct rte_eth_dev *dev)
> +{
> +	struct ixgbe_hw *hw;
> +	uint32_t mrqc;
> +
> +	ixgbe_rss_configure(dev);
> +
> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +
> +	/* MRQC: enable VF RSS */
> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> +	case ETH_64_POOLS:
> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;

> +		break;
> +
> +	case ETH_32_POOLS:
> +	case ETH_16_POOLS:

Isn't ETH_16_POOLS mode is invalid for VF RSS? It's what both spec 
states and what u handle in this patch in ixgbe_pf_host_configure(). 
IMHO it would be better to treat this mode value as an error here since 
if u get it here it indicates of a SW bug.

> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> +		break;
> +
> +	default:
> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> +		return -EINVAL;
> +	}
> +
> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> +
> +	return 0;
> +}
> +
> +static int
> +ixgbe_config_vf_default(struct rte_eth_dev *dev)
> +{
> +	struct ixgbe_hw *hw =
> +		IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +
> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> +	case ETH_64_POOLS:
> +		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +			IXGBE_MRQC_VMDQEN);
> +		break;
> +
> +	case ETH_32_POOLS:
> +		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +			IXGBE_MRQC_VMDQRT4TCEN);
> +		break;
> +
> +	case ETH_16_POOLS:
> +		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +			IXGBE_MRQC_VMDQRT8TCEN);
> +		break;
> +	default:
> +		PMD_INIT_LOG(ERR,
> +			"invalid pool number in IOV mode");
> +		break;
> +	}
> +	return 0;
> +}
> +
> +static int
>   ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>   {
>   	struct ixgbe_hw *hw =
> @@ -3358,24 +3420,25 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>   			default: ixgbe_rss_disable(dev);
>   		}
>   	} else {
> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
>   		/*
>   		 * SRIOV active scheme
> -		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> +		 * Support RSS together with VMDq & SRIOV
>   		 */
> -		case ETH_64_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQEN);
> -			break;
> -
> -		case ETH_32_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT4TCEN);
> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> +		case ETH_MQ_RX_RSS:
> +		case ETH_MQ_RX_VMDQ_RSS:
> +			ixgbe_config_vf_rss(dev);
>   			break;
>   
> -		case ETH_16_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT8TCEN);
> -			break;
> +		/* FIXME if support DCB/RSS together with VMDq & SRIOV */
> +		case ETH_MQ_RX_VMDQ_DCB:
> +		case ETH_MQ_RX_VMDQ_DCB_RSS:
> +			PMD_INIT_LOG(ERR,
> +				"Could not support DCB with VMDq & SRIOV");
> +			return -1;
>   		default:
> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
> +			ixgbe_config_vf_default(dev);
> +			break;
>   		}
>   	}
>   
> @@ -3993,6 +4056,19 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>   	PMD_INIT_FUNC_TRACE();
>   	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>   
> +	if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> +			"it should be power of 2");
> +		return -1;
> +	}
> +
> +	if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> +			"it should be equal to or less than %d",
> +			hw->mac.max_rx_queues);
> +		return -1;
> +	}
> +
>   	/*
>   	 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
>   	 * disables the VF receipt of packets if the PF MTU is > 1500.
> @@ -4094,6 +4170,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>   			IXGBE_PSRTYPE_IPV6HDR;
>   #endif
>   
> +	/* Set RQPL for VF RSS according to max Rx queue */
> +	psrtype |= (dev->data->nb_rx_queues >> 1) <<
> +		IXGBE_PSRTYPE_RQPL_SHIFT;
>   	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
>   
>   	if (dev->data->dev_conf.rxmode.enable_scatter) {

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 6/6] testpmd: Set Rx VMDq RSS mode
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
@ 2015-01-08  9:46           ` Vlad Zolotarov
  0 siblings, 0 replies; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-08  9:46 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 01/07/15 08:32, Ouyang Changchun wrote:
> Set VMDq RSS mode if it has VF(VF number is more than 1) and has RSS information.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

Reviewed-by: Vlad Zolotarov <vladz@cloudius-systems.com>

Some nitpicking below... ;)

>
> changes in v5
>    - Assign txmode.mq_mode with ETH_MQ_TX_NONE explicitly;
>    - Remove one line wrong comment.
>
> ---
>   app/test-pmd/testpmd.c | 15 ++++++++++++++-
>   1 file changed, 14 insertions(+), 1 deletion(-)
>
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index 8c69756..64fd4ee 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -1700,7 +1700,6 @@ init_port_config(void)
>   			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
>   		}
>   
> -		/* In SR-IOV mode, RSS mode is not available */
>   		if (port->dcb_flag == 0 && port->dev_info.max_vfs == 0) {
>   			if( port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
>   				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
> @@ -1708,6 +1707,20 @@ init_port_config(void)
>   				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
>   		}
>   
> +		if (port->dev_info.max_vfs != 0) {
> +			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0) {
> +				port->dev_conf.rxmode.mq_mode =
> +					ETH_MQ_RX_VMDQ_RSS;
> +				port->dev_conf.txmode.mq_mode =
> +					ETH_MQ_TX_NONE;
> +			} else {
> +				port->dev_conf.rxmode.mq_mode =
> +					ETH_MQ_RX_NONE;
> +				port->dev_conf.txmode.mq_mode =
> +					ETH_MQ_TX_NONE;

It seems that txmode.mq_mode assignment may be taken out of the 
"if-else" statement here... ;)

> +			}
> +		}
> +
>   		port->rx_conf.rx_thresh = rx_thresh;
>   		port->rx_conf.rx_free_thresh = rx_free_thresh;
>   		port->rx_conf.rx_drop_en = rx_drop_en;

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic
  2015-01-07  6:32       ` [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic Ouyang Changchun
                           ` (5 preceding siblings ...)
  2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
@ 2015-01-08  9:56         ` Vlad Zolotarov
  2015-01-18 21:58           ` Thomas Monjalon
  2015-01-12  5:59         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
  7 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-08  9:56 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 01/07/15 08:32, Ouyang Changchun wrote:
> This patch enables VF RSS for Niantic, which allow each VF having at most 4 queues.
> The actual queue number per VF depends on the total number of pool, which is
> determined by the max number of VF at PF initialization stage and the number of
> queue specified in config:
> 1) If the max number of VF is in the range from 1 to 32, and the number of rxq is 4
> ('--rxq 4' in testpmd), then there is totally 32 pools(ETH_32_POOLS), and each VF
> have 4 queues;
>   
> 2)If the max number of VF is in the range from 33 to 64, and the number of rxq is 2
> ('--rxq 2' in testpmd), then there is totally 64 pools(ETH_64_POOLS), and each VF
> have 2 queues;
>   
> On host, to enable VF RSS functionality, rx mq mode should be set as ETH_MQ_RX_VMDQ_RSS
> or ETH_MQ_RX_RSS mode, and SRIOV mode should be activated(max_vfs >= 1).
> It also needs config VF RSS information like hash function, RSS key, RSS key length.
>   
> The limitation for Niantic VF RSS is:
> the hash and key are shared among PF and all VF, the RETA table with 128 entries are
> also shared among PF and all VF. So it could not to provide a method to query the hash
> and reta content per VF on guest, while, if possible, please query them on host(PF) for
> the shared RETA information.

I've acked PATCH1 and PATCH2 already before and since there are no 
changes in them, pls.,  consider them ACKed... ;)

>
> changes in v5:
>    - Fix minor issue and some comments;
>
> changes in v4:
>    - Extract a function to remove embeded switch-case statement;
>    - Check whether RX queue number is a valid one, otherwise return error;
>    - Update the description a bit;
>   
> changes in v3:
>    - More cleanup;
>   
> changes in v2:
>    - Update the description;
>    - Use receiving queue number('--rxq <q-num>') specified in config to determine the
>      number of pool and the number of queue per VF;
>   
> changes in v1:
>    - Config VF RSS;
>
> Changchun Ouyang (6):
>    ixgbe: Code cleanup
>    ixgbe: Negotiate VF API version
>    ixgbe: Get VF queue number
>    ether: Check VMDq RSS mode
>    ixgbe: Config VF RSS
>    testpmd: Set Rx VMDq RSS mode
>
>   app/test-pmd/testpmd.c              |  15 +++-
>   lib/librte_ether/rte_ethdev.c       |  50 +++++++++++--
>   lib/librte_pmd_ixgbe/ixgbe_ethdev.h |   1 +
>   lib/librte_pmd_ixgbe/ixgbe_pf.c     |  80 ++++++++++++++++++++-
>   lib/librte_pmd_ixgbe/ixgbe_rxtx.c   | 138 ++++++++++++++++++++++++++++--------
>   5 files changed, 248 insertions(+), 36 deletions(-)
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
  2015-01-08  9:19           ` Vlad Zolotarov
@ 2015-01-08 18:48             ` Vlad Zolotarov
  2015-01-09  5:54               ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-08 18:48 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 01/08/15 11:19, Vlad Zolotarov wrote:
>
> On 01/07/15 08:32, Ouyang Changchun wrote:
>> Check mq mode for VMDq RSS, handle it correctly instead of returning 
>> an error;
>> Also remove the limitation of per pool queue number has max value of 
>> 1, because
>> the per pool queue number could be 2 or 4 if it is VMDq RSS mode;
>>
>> The number of rxq specified in config will determine the mq mode for 
>> VMDq RSS.
>>
>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>
>> changes in v5:
>>    - Fix '<' issue, it should be '<=' to test rxq number;
>>    - Extract a function to remove the embeded switch-case statement.
>>
>> ---
>>   lib/librte_ether/rte_ethdev.c | 50 
>> ++++++++++++++++++++++++++++++++++++++-----
>>   1 file changed, 45 insertions(+), 5 deletions(-)
>>
>> diff --git a/lib/librte_ether/rte_ethdev.c 
>> b/lib/librte_ether/rte_ethdev.c
>> index 95f2ceb..8363e26 100644
>> --- a/lib/librte_ether/rte_ethdev.c
>> +++ b/lib/librte_ether/rte_ethdev.c
>> @@ -503,6 +503,31 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev 
>> *dev, uint16_t nb_queues)
>>   }
>>     static int
>> +rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)
>> +{
>> +    struct rte_eth_dev *dev = &rte_eth_devices[port_id];
>> +    switch (nb_rx_q) {
>> +    case 1:
>> +    case 2:
>> +        RTE_ETH_DEV_SRIOV(dev).active =
>> +            ETH_64_POOLS;
>> +        break;
>> +    case 4:
>> +        RTE_ETH_DEV_SRIOV(dev).active =
>> +            ETH_32_POOLS;
>> +        break;
>> +    default:
>> +        return -EINVAL;
>> +    }
>> +
>> +    RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
>> +    RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
>> +        dev->pci_dev->max_vfs * nb_rx_q;
>> +
>> +    return 0;
>> +}
>> +
>> +static int
>>   rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, 
>> uint16_t nb_tx_q,
>>                 const struct rte_eth_conf *dev_conf)
>>   {
>> @@ -510,8 +535,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, 
>> uint16_t nb_rx_q, uint16_t nb_tx_q,
>>         if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
>>           /* check multi-queue mode */
>> -        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
>> -            (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>> +        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>>               (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||
>>               (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
>>               /* SRIOV only works in VMDq enable mode */
>> @@ -525,7 +549,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, 
>> uint16_t nb_rx_q, uint16_t nb_tx_q,
>>           }
>>             switch (dev_conf->rxmode.mq_mode) {
>> -        case ETH_MQ_RX_VMDQ_RSS:
>>           case ETH_MQ_RX_VMDQ_DCB:
>>           case ETH_MQ_RX_VMDQ_DCB_RSS:
>>               /* DCB/RSS VMDQ in SRIOV mode, not implement yet */
>> @@ -534,6 +557,25 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, 
>> uint16_t nb_rx_q, uint16_t nb_tx_q,
>>                       "unsupported VMDQ mq_mode rx %u\n",
>>                       port_id, dev_conf->rxmode.mq_mode);
>>               return (-EINVAL);
>> +        case ETH_MQ_RX_RSS:
>> +            PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
>> +                    " SRIOV active, "
>> +                    "Rx mq mode is changed from:"
>> +                    "mq_mode %u into VMDQ mq_mode %u\n",
>> +                    port_id,
>> +                    dev_conf->rxmode.mq_mode,
>> +                    dev->data->dev_conf.rxmode.mq_mode);
>> +        case ETH_MQ_RX_VMDQ_RSS:
>> +            dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_RSS;
>> +            if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)
>> +                if (rte_eth_dev_check_vf_rss_rxq_num(port_id, 
>> nb_rx_q) != 0) {
>> +                    PMD_DEBUG_TRACE("ethdev port_id=%d"
>> +                        " SRIOV active, invalid queue"
>> +                        " number for VMDQ RSS\n",
>> +                        port_id);
>
> Some nitpicking here: I'd add the allowed values descriptions to the 
> error message. Something like: "invalid queue number for VMDQ RSS. 
> Allowed values are 1, 2 or 4\n".
>
>> +                    return -EINVAL;
>> +                }
>> +            break;
>>           default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
>>               /* if nothing mq mode configure, use default scheme */
>>               dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;
>> @@ -553,8 +595,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, 
>> uint16_t nb_rx_q, uint16_t nb_tx_q,
>>           default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
>>               /* if nothing mq mode configure, use default scheme */
>>               dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;
>> -            if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
>> -                RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
>
> I'm not sure u may just remove it. These lines originally belong to a 
> different flow. Are u sure u can remove them like that? What if the 
> mq_mode is ETH_MQ_RX_NONE and nb_q_per_pool has been initialized to 4 
> or 8 in ixgbe_pf_host_init()?

I misread the patch - these lines belong to the txmode.mq_mode switch 
case. I think it's ok to remove these really strange lines here. And 
when I look at it i think for the similar reasons the similar lines 
should be removed in the Rx case too: consider non-RSS case with MQ DCB 
Tx configuration.

>
>>               break;
>>           }
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
  2015-01-08 18:48             ` Vlad Zolotarov
@ 2015-01-09  5:54               ` Ouyang, Changchun
  2015-01-09 13:49                 ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-09  5:54 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Friday, January 9, 2015 2:49 AM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
> 
> 
> On 01/08/15 11:19, Vlad Zolotarov wrote:
> >
> > On 01/07/15 08:32, Ouyang Changchun wrote:
> >> Check mq mode for VMDq RSS, handle it correctly instead of returning
> >> an error; Also remove the limitation of per pool queue number has max
> >> value of 1, because the per pool queue number could be 2 or 4 if it
> >> is VMDq RSS mode;
> >>
> >> The number of rxq specified in config will determine the mq mode for
> >> VMDq RSS.
> >>
> >> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>
> >> changes in v5:
> >>    - Fix '<' issue, it should be '<=' to test rxq number;
> >>    - Extract a function to remove the embeded switch-case statement.
> >>
> >> ---
> >>   lib/librte_ether/rte_ethdev.c | 50
> >> ++++++++++++++++++++++++++++++++++++++-----
> >>   1 file changed, 45 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/lib/librte_ether/rte_ethdev.c
> >> b/lib/librte_ether/rte_ethdev.c index 95f2ceb..8363e26 100644
> >> --- a/lib/librte_ether/rte_ethdev.c
> >> +++ b/lib/librte_ether/rte_ethdev.c
> >> @@ -503,6 +503,31 @@ rte_eth_dev_tx_queue_config(struct
> rte_eth_dev
> >> *dev, uint16_t nb_queues)
> >>   }
> >>     static int
> >> +rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)
> >> +{
> >> +    struct rte_eth_dev *dev = &rte_eth_devices[port_id];
> >> +    switch (nb_rx_q) {
> >> +    case 1:
> >> +    case 2:
> >> +        RTE_ETH_DEV_SRIOV(dev).active =
> >> +            ETH_64_POOLS;
> >> +        break;
> >> +    case 4:
> >> +        RTE_ETH_DEV_SRIOV(dev).active =
> >> +            ETH_32_POOLS;
> >> +        break;
> >> +    default:
> >> +        return -EINVAL;
> >> +    }
> >> +
> >> +    RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
> >> +    RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
> >> +        dev->pci_dev->max_vfs * nb_rx_q;
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static int
> >>   rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
> >> uint16_t nb_tx_q,
> >>                 const struct rte_eth_conf *dev_conf)
> >>   {
> >> @@ -510,8 +535,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,
> >> uint16_t nb_rx_q, uint16_t nb_tx_q,
> >>         if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
> >>           /* check multi-queue mode */
> >> -        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
> >> -            (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
> >> +        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
> >>               (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||
> >>               (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
> >>               /* SRIOV only works in VMDq enable mode */ @@ -525,7
> >> +549,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t
> >> nb_rx_q, uint16_t nb_tx_q,
> >>           }
> >>             switch (dev_conf->rxmode.mq_mode) {
> >> -        case ETH_MQ_RX_VMDQ_RSS:
> >>           case ETH_MQ_RX_VMDQ_DCB:
> >>           case ETH_MQ_RX_VMDQ_DCB_RSS:
> >>               /* DCB/RSS VMDQ in SRIOV mode, not implement yet */ @@
> >> -534,6 +557,25 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,
> uint16_t
> >> nb_rx_q, uint16_t nb_tx_q,
> >>                       "unsupported VMDQ mq_mode rx %u\n",
> >>                       port_id, dev_conf->rxmode.mq_mode);
> >>               return (-EINVAL);
> >> +        case ETH_MQ_RX_RSS:
> >> +            PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
> >> +                    " SRIOV active, "
> >> +                    "Rx mq mode is changed from:"
> >> +                    "mq_mode %u into VMDQ mq_mode %u\n",
> >> +                    port_id,
> >> +                    dev_conf->rxmode.mq_mode,
> >> +                    dev->data->dev_conf.rxmode.mq_mode);
> >> +        case ETH_MQ_RX_VMDQ_RSS:
> >> +            dev->data->dev_conf.rxmode.mq_mode =
> ETH_MQ_RX_VMDQ_RSS;
> >> +            if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)
> >> +                if (rte_eth_dev_check_vf_rss_rxq_num(port_id,
> >> nb_rx_q) != 0) {
> >> +                    PMD_DEBUG_TRACE("ethdev port_id=%d"
> >> +                        " SRIOV active, invalid queue"
> >> +                        " number for VMDQ RSS\n",
> >> +                        port_id);
> >
> > Some nitpicking here: I'd add the allowed values descriptions to the
> > error message. Something like: "invalid queue number for VMDQ RSS.
> > Allowed values are 1, 2 or 4\n".
> >
> >> +                    return -EINVAL;
> >> +                }
> >> +            break;
> >>           default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
> >>               /* if nothing mq mode configure, use default scheme */
> >>               dev->data->dev_conf.rxmode.mq_mode =
> >> ETH_MQ_RX_VMDQ_ONLY; @@ -553,8 +595,6 @@
> >> rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
> uint16_t nb_tx_q,
> >>           default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
> >>               /* if nothing mq mode configure, use default scheme */
> >>               dev->data->dev_conf.txmode.mq_mode =
> ETH_MQ_TX_VMDQ_ONLY;
> >> -            if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
> >> -                RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
> >
> > I'm not sure u may just remove it. These lines originally belong to a
> > different flow. Are u sure u can remove them like that? What if the
> > mq_mode is ETH_MQ_RX_NONE and nb_q_per_pool has been initialized
> to 4
> > or 8 in ixgbe_pf_host_init()?
> 
> I misread the patch - these lines belong to the txmode.mq_mode switch case.
> I think it's ok to remove these really strange lines here. And when I look at it i
> think for the similar reasons the similar lines should be removed in the Rx
> case too: consider non-RSS case with MQ DCB Tx configuration.
> 
I search code in this function, only one place has   
" if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1) 
           RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;"

The only place is default branch, which is for rx_none, or vmdq_only mode,
We don't need remove this, as it should assign as 1 because it did use 1 queue per pool.

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 5/6] ixgbe: Config VF RSS
  2015-01-08  9:43           ` Vlad Zolotarov
@ 2015-01-09  6:07             ` Ouyang, Changchun
  2015-01-09 14:01               ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-09  6:07 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Thursday, January 8, 2015 5:43 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v5 5/6] ixgbe: Config VF RSS
> 
> 
> On 01/07/15 08:32, Ouyang Changchun wrote:
> > It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
> RSS.
> >
> > The psrtype will determine how many queues the received packets will
> > distribute to, and the value of psrtype should depends on both facet:
> > max VF rxq number which has been negotiated with PF, and the number of
> rxq specified in config on guest.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >
> > Changes in v4:
> >   - the number of rxq from config should be power of 2 and should not
> bigger than
> >      max VF rxq number(negotiated between guest and host).
> >
> > ---
> >   lib/librte_pmd_ixgbe/ixgbe_pf.c   |  15 ++++++
> >   lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 103
> +++++++++++++++++++++++++++++++++-----
> >   2 files changed, 106 insertions(+), 12 deletions(-)
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index dbda9b5..93f6e43 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> *eth_dev)
> >   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> >mac.num_rar_entries), 0);
> >   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> >mac.num_rar_entries), 0);
> >
> > +	/*
> > +	 * VF RSS can support at most 4 queues for each VF, even if
> > +	 * 8 queues are available for each VF, it need refine to 4
> > +	 * queues here due to this limitation, otherwise no queue
> > +	 * will receive any packet even RSS is enabled.
> > +	 */
> > +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> ETH_MQ_RX_VMDQ_RSS) {
> > +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> > +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> ETH_32_POOLS;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> > +				dev_num_vf(eth_dev) * 4;
> > +		}
> > +	}
> > +
> >   	/* set VMDq map to default PF pool */
> >   	hw->mac.ops.set_vmdq(hw, 0,
> > RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > index f69abda..e83a9ab 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > @@ -3327,6 +3327,68 @@ ixgbe_alloc_rx_queue_mbufs(struct
> igb_rx_queue *rxq)
> >   }
> >
> >   static int
> > +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> > +	struct ixgbe_hw *hw;
> > +	uint32_t mrqc;
> > +
> > +	ixgbe_rss_configure(dev);
> > +
> > +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > +
> > +	/* MRQC: enable VF RSS */
> > +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> > +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> > +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > +	case ETH_64_POOLS:
> > +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> 
> > +		break;
> > +
> > +	case ETH_32_POOLS:
> > +	case ETH_16_POOLS:
> 
> Isn't ETH_16_POOLS mode is invalid for VF RSS? It's what both spec states
> and what u handle in this patch in ixgbe_pf_host_configure().
> IMHO it would be better to treat this mode value as an error here since if u
> get it here it indicates of a SW bug.

I think we discussed it before already,  return err here will break here in the case of max vf number is less than 16.
If doing that, This make the library seems can't support vf rss in the case of max vf num less than 16.
So we obviously don't hope it break here.

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
  2015-01-09  5:54               ` Ouyang, Changchun
@ 2015-01-09 13:49                 ` Vlad Zolotarov
  2015-01-12  3:41                   ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-09 13:49 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 01/09/15 07:54, Ouyang, Changchun wrote:
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Friday, January 9, 2015 2:49 AM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
>>
>>
>> On 01/08/15 11:19, Vlad Zolotarov wrote:
>>> On 01/07/15 08:32, Ouyang Changchun wrote:
>>>> Check mq mode for VMDq RSS, handle it correctly instead of returning
>>>> an error; Also remove the limitation of per pool queue number has max
>>>> value of 1, because the per pool queue number could be 2 or 4 if it
>>>> is VMDq RSS mode;
>>>>
>>>> The number of rxq specified in config will determine the mq mode for
>>>> VMDq RSS.
>>>>
>>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>>>
>>>> changes in v5:
>>>>     - Fix '<' issue, it should be '<=' to test rxq number;
>>>>     - Extract a function to remove the embeded switch-case statement.
>>>>
>>>> ---
>>>>    lib/librte_ether/rte_ethdev.c | 50
>>>> ++++++++++++++++++++++++++++++++++++++-----
>>>>    1 file changed, 45 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/lib/librte_ether/rte_ethdev.c
>>>> b/lib/librte_ether/rte_ethdev.c index 95f2ceb..8363e26 100644
>>>> --- a/lib/librte_ether/rte_ethdev.c
>>>> +++ b/lib/librte_ether/rte_ethdev.c
>>>> @@ -503,6 +503,31 @@ rte_eth_dev_tx_queue_config(struct
>> rte_eth_dev
>>>> *dev, uint16_t nb_queues)
>>>>    }
>>>>      static int
>>>> +rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)
>>>> +{
>>>> +    struct rte_eth_dev *dev = &rte_eth_devices[port_id];
>>>> +    switch (nb_rx_q) {
>>>> +    case 1:
>>>> +    case 2:
>>>> +        RTE_ETH_DEV_SRIOV(dev).active =
>>>> +            ETH_64_POOLS;
>>>> +        break;
>>>> +    case 4:
>>>> +        RTE_ETH_DEV_SRIOV(dev).active =
>>>> +            ETH_32_POOLS;
>>>> +        break;
>>>> +    default:
>>>> +        return -EINVAL;
>>>> +    }
>>>> +
>>>> +    RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
>>>> +    RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
>>>> +        dev->pci_dev->max_vfs * nb_rx_q;
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int
>>>>    rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
>>>> uint16_t nb_tx_q,
>>>>                  const struct rte_eth_conf *dev_conf)
>>>>    {
>>>> @@ -510,8 +535,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,
>>>> uint16_t nb_rx_q, uint16_t nb_tx_q,
>>>>          if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
>>>>            /* check multi-queue mode */
>>>> -        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
>>>> -            (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>>>> +        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>>>>                (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||
>>>>                (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
>>>>                /* SRIOV only works in VMDq enable mode */ @@ -525,7
>>>> +549,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t
>>>> nb_rx_q, uint16_t nb_tx_q,
>>>>            }
>>>>              switch (dev_conf->rxmode.mq_mode) {
>>>> -        case ETH_MQ_RX_VMDQ_RSS:
>>>>            case ETH_MQ_RX_VMDQ_DCB:
>>>>            case ETH_MQ_RX_VMDQ_DCB_RSS:
>>>>                /* DCB/RSS VMDQ in SRIOV mode, not implement yet */ @@
>>>> -534,6 +557,25 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,
>> uint16_t
>>>> nb_rx_q, uint16_t nb_tx_q,
>>>>                        "unsupported VMDQ mq_mode rx %u\n",
>>>>                        port_id, dev_conf->rxmode.mq_mode);
>>>>                return (-EINVAL);
>>>> +        case ETH_MQ_RX_RSS:
>>>> +            PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
>>>> +                    " SRIOV active, "
>>>> +                    "Rx mq mode is changed from:"
>>>> +                    "mq_mode %u into VMDQ mq_mode %u\n",
>>>> +                    port_id,
>>>> +                    dev_conf->rxmode.mq_mode,
>>>> +                    dev->data->dev_conf.rxmode.mq_mode);
>>>> +        case ETH_MQ_RX_VMDQ_RSS:
>>>> +            dev->data->dev_conf.rxmode.mq_mode =
>> ETH_MQ_RX_VMDQ_RSS;
>>>> +            if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)
>>>> +                if (rte_eth_dev_check_vf_rss_rxq_num(port_id,
>>>> nb_rx_q) != 0) {
>>>> +                    PMD_DEBUG_TRACE("ethdev port_id=%d"
>>>> +                        " SRIOV active, invalid queue"
>>>> +                        " number for VMDQ RSS\n",
>>>> +                        port_id);
>>> Some nitpicking here: I'd add the allowed values descriptions to the
>>> error message. Something like: "invalid queue number for VMDQ RSS.
>>> Allowed values are 1, 2 or 4\n".
>>>
>>>> +                    return -EINVAL;
>>>> +                }
>>>> +            break;
>>>>            default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
>>>>                /* if nothing mq mode configure, use default scheme */
>>>>                dev->data->dev_conf.rxmode.mq_mode =
>>>> ETH_MQ_RX_VMDQ_ONLY; @@ -553,8 +595,6 @@
>>>> rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
>> uint16_t nb_tx_q,
>>>>            default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
>>>>                /* if nothing mq mode configure, use default scheme */
>>>>                dev->data->dev_conf.txmode.mq_mode =
>> ETH_MQ_TX_VMDQ_ONLY;
>>>> -            if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
>>>> -                RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
>>> I'm not sure u may just remove it. These lines originally belong to a
>>> different flow. Are u sure u can remove them like that? What if the
>>> mq_mode is ETH_MQ_RX_NONE and nb_q_per_pool has been initialized
>> to 4
>>> or 8 in ixgbe_pf_host_init()?
>> I misread the patch - these lines belong to the txmode.mq_mode switch case.
>> I think it's ok to remove these really strange lines here. And when I look at it i
>> think for the similar reasons the similar lines should be removed in the Rx
>> case too: consider non-RSS case with MQ DCB Tx configuration.
>>
> I search code in this function, only one place has
> " if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
>             RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;"
>
> The only place is default branch, which is for rx_none, or vmdq_only mode,

Here is a snippet of an rte_eth_dev_check_mq_mode() from the current master:

		switch (dev_conf->rxmode.mq_mode) {
		case ETH_MQ_RX_VMDQ_RSS:
		case ETH_MQ_RX_VMDQ_DCB:
		case ETH_MQ_RX_VMDQ_DCB_RSS:
			/* DCB/RSS VMDQ in SRIOV mode, not implement yet */
			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
					" SRIOV active, "
					"unsupported VMDQ mq_mode rx %u\n",
					port_id, dev_conf->rxmode.mq_mode);
			return (-EINVAL);
		default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
			/* if nothing mq mode configure, use default scheme */
			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;
			*if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)**                  <---- This is one
**				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;*
			break;
		}

		switch (dev_conf->txmode.mq_mode) {
		case ETH_MQ_TX_VMDQ_DCB:
			/* DCB VMDQ in SRIOV mode, not implement yet */
			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
					" SRIOV active, "
					"unsupported VMDQ mq_mode tx %u\n",
					port_id, dev_conf->txmode.mq_mode);
			return (-EINVAL);
		default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
			/* if nothing mq mode configure, use default scheme */
			dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;
			if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)              <------ This is two. This is what your patch is removing
				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
			break;
		}




> We don't need remove this, as it should assign as 1 because it did use 1 queue per pool.

And why is that? Just because RSS was not enabled? And what if a user 
wants multiple Tx queues? Mode 1100b of MRQE for instance?

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 5/6] ixgbe: Config VF RSS
  2015-01-09  6:07             ` Ouyang, Changchun
@ 2015-01-09 14:01               ` Vlad Zolotarov
  2015-01-12  5:11                 ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-09 14:01 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 01/09/15 08:07, Ouyang, Changchun wrote:
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Thursday, January 8, 2015 5:43 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v5 5/6] ixgbe: Config VF RSS
>>
>>
>> On 01/07/15 08:32, Ouyang Changchun wrote:
>>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
>> RSS.
>>> The psrtype will determine how many queues the received packets will
>>> distribute to, and the value of psrtype should depends on both facet:
>>> max VF rxq number which has been negotiated with PF, and the number of
>> rxq specified in config on guest.
>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>>
>>> Changes in v4:
>>>    - the number of rxq from config should be power of 2 and should not
>> bigger than
>>>       max VF rxq number(negotiated between guest and host).
>>>
>>> ---
>>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   |  15 ++++++
>>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 103
>> +++++++++++++++++++++++++++++++++-----
>>>    2 files changed, 106 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index dbda9b5..93f6e43 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
>> *eth_dev)
>>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
>>> mac.num_rar_entries), 0);
>>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
>>> mac.num_rar_entries), 0);
>>>
>>> +	/*
>>> +	 * VF RSS can support at most 4 queues for each VF, even if
>>> +	 * 8 queues are available for each VF, it need refine to 4
>>> +	 * queues here due to this limitation, otherwise no queue
>>> +	 * will receive any packet even RSS is enabled.
>>> +	 */
>>> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
>> ETH_MQ_RX_VMDQ_RSS) {
>>> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
>> ETH_32_POOLS;
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
>>> +				dev_num_vf(eth_dev) * 4;
>>> +		}
>>> +	}
>>> +
>>>    	/* set VMDq map to default PF pool */
>>>    	hw->mac.ops.set_vmdq(hw, 0,
>>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> index f69abda..e83a9ab 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> @@ -3327,6 +3327,68 @@ ixgbe_alloc_rx_queue_mbufs(struct
>> igb_rx_queue *rxq)
>>>    }
>>>
>>>    static int
>>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
>>> +	struct ixgbe_hw *hw;
>>> +	uint32_t mrqc;
>>> +
>>> +	ixgbe_rss_configure(dev);
>>> +
>>> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>> +
>>> +	/* MRQC: enable VF RSS */
>>> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
>>> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
>>> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
>>> +	case ETH_64_POOLS:
>>> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
>>> +		break;
>>> +
>>> +	case ETH_32_POOLS:
>>> +	case ETH_16_POOLS:
>> Isn't ETH_16_POOLS mode is invalid for VF RSS? It's what both spec states
>> and what u handle in this patch in ixgbe_pf_host_configure().
>> IMHO it would be better to treat this mode value as an error here since if u
>> get it here it indicates of a SW bug.
> I think we discussed it before already,  return err here will break here in the case of max vf number is less than 16.
> If doing that, This make the library seems can't support vf rss in the case of max vf num less than 16.
> So we obviously don't hope it break here.

I don't remember we were discussing these specific lines. However I do 
remember we talked about the previous section of this patch.
I'm afraid u are missing my point here: ixgbe_pf_host_configure() is 
called before ixgbe_config_vf_rss() in the ixgbe_dev_start() flow. This 
means that
RTE_ETH_DEV_SRIOV(dev).active will already be adjusted by your (!!!) 
code in the ixgbe_pf_host_configure() when u get to 
ixgbe_config_vf_rss() and it should not be equal ETH_16_POOLS unless 
there is a bug in your code.

So, unless I've missed something here, don't u think an assert() would 
be appropriate if RTE_ETH_DEV_SRIOV(dev).active equals ETH_16_POOLS?

thanks,
vlad

>
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
  2015-01-09 13:49                 ` Vlad Zolotarov
@ 2015-01-12  3:41                   ` Ouyang, Changchun
  2015-01-12 13:58                     ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-12  3:41 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
Sent: Friday, January 09, 2015 9:50 PM
To: Ouyang, Changchun; dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode


On 01/09/15 07:54, Ouyang, Changchun wrote:





-----Original Message-----

From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]

Sent: Friday, January 9, 2015 2:49 AM

To: Ouyang, Changchun; dev@dpdk.org<mailto:dev@dpdk.org>

Subject: Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode





On 01/08/15 11:19, Vlad Zolotarov wrote:



On 01/07/15 08:32, Ouyang Changchun wrote:

Check mq mode for VMDq RSS, handle it correctly instead of returning

an error; Also remove the limitation of per pool queue number has max

value of 1, because the per pool queue number could be 2 or 4 if it

is VMDq RSS mode;



The number of rxq specified in config will determine the mq mode for

VMDq RSS.



Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com><mailto:changchun.ouyang@intel.com>



changes in v5:

   - Fix '<' issue, it should be '<=' to test rxq number;

   - Extract a function to remove the embeded switch-case statement.



---

  lib/librte_ether/rte_ethdev.c | 50

++++++++++++++++++++++++++++++++++++++-----

  1 file changed, 45 insertions(+), 5 deletions(-)



diff --git a/lib/librte_ether/rte_ethdev.c

b/lib/librte_ether/rte_ethdev.c index 95f2ceb..8363e26 100644

--- a/lib/librte_ether/rte_ethdev.c

+++ b/lib/librte_ether/rte_ethdev.c

@@ -503,6 +503,31 @@ rte_eth_dev_tx_queue_config(struct

rte_eth_dev

*dev, uint16_t nb_queues)

  }

    static int

+rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)

+{

+    struct rte_eth_dev *dev = &rte_eth_devices[port_id];

+    switch (nb_rx_q) {

+    case 1:

+    case 2:

+        RTE_ETH_DEV_SRIOV(dev).active =

+            ETH_64_POOLS;

+        break;

+    case 4:

+        RTE_ETH_DEV_SRIOV(dev).active =

+            ETH_32_POOLS;

+        break;

+    default:

+        return -EINVAL;

+    }

+

+    RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;

+    RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =

+        dev->pci_dev->max_vfs * nb_rx_q;

+

+    return 0;

+}

+

+static int

  rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,

uint16_t nb_tx_q,

                const struct rte_eth_conf *dev_conf)

  {

@@ -510,8 +535,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,

uint16_t nb_rx_q, uint16_t nb_tx_q,

        if (RTE_ETH_DEV_SRIOV(dev).active != 0) {

          /* check multi-queue mode */

-        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||

-            (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||

+        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||

              (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||

              (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {

              /* SRIOV only works in VMDq enable mode */ @@ -525,7

+549,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t

nb_rx_q, uint16_t nb_tx_q,

          }

            switch (dev_conf->rxmode.mq_mode) {

-        case ETH_MQ_RX_VMDQ_RSS:

          case ETH_MQ_RX_VMDQ_DCB:

          case ETH_MQ_RX_VMDQ_DCB_RSS:

              /* DCB/RSS VMDQ in SRIOV mode, not implement yet */ @@

-534,6 +557,25 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,

uint16_t

nb_rx_q, uint16_t nb_tx_q,

                      "unsupported VMDQ mq_mode rx %u\n",

                      port_id, dev_conf->rxmode.mq_mode);

              return (-EINVAL);

+        case ETH_MQ_RX_RSS:

+            PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8

+                    " SRIOV active, "

+                    "Rx mq mode is changed from:"

+                    "mq_mode %u into VMDQ mq_mode %u\n",

+                    port_id,

+                    dev_conf->rxmode.mq_mode,

+                    dev->data->dev_conf.rxmode.mq_mode);

+        case ETH_MQ_RX_VMDQ_RSS:

+            dev->data->dev_conf.rxmode.mq_mode =

ETH_MQ_RX_VMDQ_RSS;

+            if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)

+                if (rte_eth_dev_check_vf_rss_rxq_num(port_id,

nb_rx_q) != 0) {

+                    PMD_DEBUG_TRACE("ethdev port_id=%d"

+                        " SRIOV active, invalid queue"

+                        " number for VMDQ RSS\n",

+                        port_id);



Some nitpicking here: I'd add the allowed values descriptions to the

error message. Something like: "invalid queue number for VMDQ RSS.

Allowed values are 1, 2 or 4\n".



+                    return -EINVAL;

+                }

+            break;

          default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */

              /* if nothing mq mode configure, use default scheme */

              dev->data->dev_conf.rxmode.mq_mode =

ETH_MQ_RX_VMDQ_ONLY; @@ -553,8 +595,6 @@

rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,

uint16_t nb_tx_q,

          default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */

              /* if nothing mq mode configure, use default scheme */

              dev->data->dev_conf.txmode.mq_mode =

ETH_MQ_TX_VMDQ_ONLY;

-            if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)

-                RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;



I'm not sure u may just remove it. These lines originally belong to a

different flow. Are u sure u can remove them like that? What if the

mq_mode is ETH_MQ_RX_NONE and nb_q_per_pool has been initialized

to 4

or 8 in ixgbe_pf_host_init()?



I misread the patch - these lines belong to the txmode.mq_mode switch case.

I think it's ok to remove these really strange lines here. And when I look at it i

think for the similar reasons the similar lines should be removed in the Rx

case too: consider non-RSS case with MQ DCB Tx configuration.



I search code in this function, only one place has

" if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)

           RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;"



The only place is default branch, which is for rx_none, or vmdq_only mode,

Here is a snippet of an rte_eth_dev_check_mq_mode() from the current master:

               switch (dev_conf->rxmode.mq_mode) {

               case ETH_MQ_RX_VMDQ_RSS:

               case ETH_MQ_RX_VMDQ_DCB:

               case ETH_MQ_RX_VMDQ_DCB_RSS:

                       /* DCB/RSS VMDQ in SRIOV mode, not implement yet */

                       PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8

                                      " SRIOV active, "

                                      "unsupported VMDQ mq_mode rx %u\n",

                                      port_id, dev_conf->rxmode.mq_mode);

                       return (-EINVAL);

               default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */

                       /* if nothing mq mode configure, use default scheme */

                       dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;

                       if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)                 <---- This is one

                               RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;

                       break;

               }



               switch (dev_conf->txmode.mq_mode) {

               case ETH_MQ_TX_VMDQ_DCB:

                       /* DCB VMDQ in SRIOV mode, not implement yet */

                       PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8

                                      " SRIOV active, "

                                      "unsupported VMDQ mq_mode tx %u\n",

                                      port_id, dev_conf->txmode.mq_mode);

                       return (-EINVAL);

               default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */

                       /* if nothing mq mode configure, use default scheme */

                       dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;

                       if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)              <------ This is two. This is what your patch is removing

                               RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;

                       break;

               }


Changchun: yes you are correct, what I mean in my last response is that only one place AFTER my removal, so there are 2 places before my removal.
no controversial here.




We don't need remove this, as it should assign as 1 because it did use 1 queue per pool.

And why is that? Just because RSS was not enabled? And what if a user wants multiple Tx queues? Mode 1100b of MRQE for instance?

Changchun: I can explain why I need this change(remove the second place) here,
In the txmode, when txmode is ETH_MQ_TX_NONE, but the rx mode could either be ETH_MQ_RX_NONE or
ETH_MQ_RX_VMDQ_RSS, so we could not forcedly set nb_q_per_pool into 1 just hit the condition of txmode is ETH_MQ_TX_NONE,
Because we need consider it is combination of rx mode is ETH_MQ_RX_VMDQ_RSS, and tx mode is  ETH_MQ_TX_NONE,
In such a case, the queue number per pool could be 1, or 2, or 4.

In another hand, introducing ETH_MQ_TX_VMDQ_RSS for tx mode, seems very strange, because tx side has no rss feature.

thanks Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 5/6] ixgbe: Config VF RSS
  2015-01-09 14:01               ` Vlad Zolotarov
@ 2015-01-12  5:11                 ` Ouyang, Changchun
  0 siblings, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-12  5:11 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Friday, January 9, 2015 10:02 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v5 5/6] ixgbe: Config VF RSS
> 
> 
> On 01/09/15 08:07, Ouyang, Changchun wrote:
> >
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Thursday, January 8, 2015 5:43 PM
> >> To: Ouyang, Changchun; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v5 5/6] ixgbe: Config VF RSS
> >>
> >>
> >> On 01/07/15 08:32, Ouyang Changchun wrote:
> >>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable
> VF
> >> RSS.
> >>> The psrtype will determine how many queues the received packets will
> >>> distribute to, and the value of psrtype should depends on both facet:
> >>> max VF rxq number which has been negotiated with PF, and the number
> >>> of
> >> rxq specified in config on guest.
> >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>>
> >>> Changes in v4:
> >>>    - the number of rxq from config should be power of 2 and should
> >>> not
> >> bigger than
> >>>       max VF rxq number(negotiated between guest and host).
> >>>
> >>> ---
> >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   |  15 ++++++
> >>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 103
> >> +++++++++++++++++++++++++++++++++-----
> >>>    2 files changed, 106 insertions(+), 12 deletions(-)
> >>>
> >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index dbda9b5..93f6e43 100644
> >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> >> *eth_dev)
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> mac.num_rar_entries), 0);
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> mac.num_rar_entries), 0);
> >>>
> >>> +	/*
> >>> +	 * VF RSS can support at most 4 queues for each VF, even if
> >>> +	 * 8 queues are available for each VF, it need refine to 4
> >>> +	 * queues here due to this limitation, otherwise no queue
> >>> +	 * will receive any packet even RSS is enabled.
> >>> +	 */
> >>> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> >> ETH_MQ_RX_VMDQ_RSS) {
> >>> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> >> ETH_32_POOLS;
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> >>> +				dev_num_vf(eth_dev) * 4;
> >>> +		}
> >>> +	}
> >>> +
> >>>    	/* set VMDq map to default PF pool */
> >>>    	hw->mac.ops.set_vmdq(hw, 0,
> >>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> >>>
> >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> index f69abda..e83a9ab 100644
> >>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> @@ -3327,6 +3327,68 @@ ixgbe_alloc_rx_queue_mbufs(struct
> >> igb_rx_queue *rxq)
> >>>    }
> >>>
> >>>    static int
> >>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> >>> +	struct ixgbe_hw *hw;
> >>> +	uint32_t mrqc;
> >>> +
> >>> +	ixgbe_rss_configure(dev);
> >>> +
> >>> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>> +
> >>> +	/* MRQC: enable VF RSS */
> >>> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> >>> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> >>> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >>> +	case ETH_64_POOLS:
> >>> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> >>> +		break;
> >>> +
> >>> +	case ETH_32_POOLS:
> >>> +	case ETH_16_POOLS:
> >> Isn't ETH_16_POOLS mode is invalid for VF RSS? It's what both spec
> >> states and what u handle in this patch in ixgbe_pf_host_configure().
> >> IMHO it would be better to treat this mode value as an error here
> >> since if u get it here it indicates of a SW bug.
> > I think we discussed it before already,  return err here will break here in the
> case of max vf number is less than 16.
> > If doing that, This make the library seems can't support vf rss in the case of
> max vf num less than 16.
> > So we obviously don't hope it break here.
> 
> I don't remember we were discussing these specific lines. However I do
> remember we talked about the previous section of this patch.
> I'm afraid u are missing my point here: ixgbe_pf_host_configure() is called
> before ixgbe_config_vf_rss() in the ixgbe_dev_start() flow. This means that
> RTE_ETH_DEV_SRIOV(dev).active will already be adjusted by your (!!!) code
> in the ixgbe_pf_host_configure() when u get to
> ixgbe_config_vf_rss() and it should not be equal ETH_16_POOLS unless there
> is a bug in your code.
> 
> So, unless I've missed something here, don't u think an assert() would be
> appropriate if RTE_ETH_DEV_SRIOV(dev).active equals ETH_16_POOLS?

Ooh, thanks for identifying this. Here ETH_16_POOLS branch not necessary, as you said, I 
Have resolved it in function ixgbe_pf_host_configure.
Then I will fix it in v6.
Thanks again
Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v6 0/6] Enable VF RSS for Niantic
  2015-01-07  6:32       ` [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic Ouyang Changchun
                           ` (6 preceding siblings ...)
  2015-01-08  9:56         ` [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic Vlad Zolotarov
@ 2015-01-12  5:59         ` Ouyang Changchun
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 1/6] ixgbe: Code cleanup Ouyang Changchun
                             ` (6 more replies)
  7 siblings, 7 replies; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-12  5:59 UTC (permalink / raw)
  To: dev

This patch enables VF RSS for Niantic, which allow each VF having at most 4 queues.
The actual queue number per VF depends on the total number of pool, which is
determined by the max number of VF at PF initialization stage and the number of
queue specified in config:
1) If the max number of VF is in the range from 1 to 32, and the number of rxq is 4
('--rxq 4' in testpmd), then there is totally 32 pools(ETH_32_POOLS), and each VF
have 4 queues;
 
2)If the max number of VF is in the range from 33 to 64, and the number of rxq is 2
('--rxq 2' in testpmd), then there is totally 64 pools(ETH_64_POOLS), and each VF
have 2 queues;
 
On host, to enable VF RSS functionality, rx mq mode should be set as ETH_MQ_RX_VMDQ_RSS
or ETH_MQ_RX_RSS mode, and SRIOV mode should be activated(max_vfs >= 1).
It also needs config VF RSS information like hash function, RSS key, RSS key length.
 
The limitation for Niantic VF RSS is:
the hash and key are shared among PF and all VF, the RETA table with 128 entries are
also shared among PF and all VF. So it could not to provide a method to query the hash
and reta content per VF on guest, while, if possible, please query them on host(PF) for
the shared RETA information.
 
changes in v6:
  - refine codes and update message according to comments;

changes in v5:
  - Fix minor issue and some comments;
 
changes in v4:
  - Extract a function to remove embeded switch-case statement;
  - Check whether RX queue number is a valid one, otherwise return error;
  - Update the description a bit;
 
changes in v3:
  - More cleanup;
 
changes in v2:
  - Update the description;
  - Use receiving queue number('--rxq <q-num>') specified in config to determine the
    number of pool and the number of queue per VF;
 
changes in v1:
  - Config VF RSS;

Changchun Ouyang (6):
  ixgbe: Code cleanup
  ixgbe: Negotiate VF API version
  ixgbe: Get VF queue number
  ether: Check VMDq RSS mode
  ixgbe: Config VF RSS
  testpmd: Set Rx VMDq RSS mode

 app/test-pmd/testpmd.c              |  12 +++-
 lib/librte_ether/rte_ethdev.c       |  51 ++++++++++++--
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |   1 +
 lib/librte_pmd_ixgbe/ixgbe_pf.c     |  80 ++++++++++++++++++++-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c   | 137 ++++++++++++++++++++++++++++--------
 5 files changed, 245 insertions(+), 36 deletions(-)

-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v6 1/6] ixgbe: Code cleanup
  2015-01-12  5:59         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
@ 2015-01-12  5:59           ` Ouyang Changchun
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
                             ` (5 subsequent siblings)
  6 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-12  5:59 UTC (permalink / raw)
  To: dev

Put global register configuring out of loop for queue; also fix typo and indent.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 5c36bff..f69abda 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3548,9 +3548,9 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
 				IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
 			}
 			srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
-				   IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
-				  IXGBE_SRRCTL_BSIZEHDR_MASK);
-			srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+				IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
+				IXGBE_SRRCTL_BSIZEHDR_MASK);
+			srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
 		} else
 #endif
 			srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
@@ -3985,7 +3985,7 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 	struct igb_rx_queue *rxq;
 	struct rte_pktmbuf_pool_private *mbp_priv;
 	uint64_t bus_addr;
-	uint32_t srrctl;
+	uint32_t srrctl, psrtype = 0;
 	uint16_t buf_size;
 	uint16_t i;
 	int ret;
@@ -4039,20 +4039,10 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		 * Configure Header Split
 		 */
 		if (dev->data->dev_conf.rxmode.header_split) {
-
-			/* Must setup the PSRTYPE register */
-			uint32_t psrtype;
-			psrtype = IXGBE_PSRTYPE_TCPHDR |
-				IXGBE_PSRTYPE_UDPHDR   |
-				IXGBE_PSRTYPE_IPV4HDR  |
-				IXGBE_PSRTYPE_IPV6HDR;
-
-			IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
-
 			srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
-				   IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
-				  IXGBE_SRRCTL_BSIZEHDR_MASK);
-			srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+				IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
+				IXGBE_SRRCTL_BSIZEHDR_MASK);
+			srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
 		} else
 #endif
 			srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
@@ -4095,6 +4085,17 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		}
 	}
 
+#ifdef RTE_HEADER_SPLIT_ENABLE
+	if (dev->data->dev_conf.rxmode.header_split)
+		/* Must setup the PSRTYPE register */
+		psrtype = IXGBE_PSRTYPE_TCPHDR |
+			IXGBE_PSRTYPE_UDPHDR   |
+			IXGBE_PSRTYPE_IPV4HDR  |
+			IXGBE_PSRTYPE_IPV6HDR;
+#endif
+
+	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
+
 	if (dev->data->dev_conf.rxmode.enable_scatter) {
 		if (!dev->data->scattered_rx)
 			PMD_INIT_LOG(DEBUG, "forcing scatter mode");
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v6 2/6] ixgbe: Negotiate VF API version
  2015-01-12  5:59         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 1/6] ixgbe: Code cleanup Ouyang Changchun
@ 2015-01-12  5:59           ` Ouyang Changchun
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 3/6] ixgbe: Get VF queue number Ouyang Changchun
                             ` (4 subsequent siblings)
  6 siblings, 0 replies; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-12  5:59 UTC (permalink / raw)
  To: dev

Negotiate API version with VF when receiving the IXGBE_VF_API_NEGOTIATE message.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  1 +
 lib/librte_pmd_ixgbe/ixgbe_pf.c     | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
index ca99170..730098d 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.h
@@ -159,6 +159,7 @@ struct ixgbe_vf_info {
 	uint16_t tx_rate[IXGBE_MAX_QUEUE_NUM_PER_VF];
 	uint16_t vlan_count;
 	uint8_t spoofchk_enabled;
+	uint8_t api_version;
 };
 
 /*
diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index 51da1fd..495aff5 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -469,6 +469,28 @@ ixgbe_set_vf_lpe(struct rte_eth_dev *dev, __rte_unused uint32_t vf, uint32_t *ms
 }
 
 static int
+ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
+{
+	uint32_t api_version = msgbuf[1];
+	struct ixgbe_vf_info *vfinfo =
+		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
+
+	switch (api_version) {
+	case ixgbe_mbox_api_10:
+	case ixgbe_mbox_api_11:
+		vfinfo[vf].api_version = (uint8_t)api_version;
+		return 0;
+	default:
+		break;
+	}
+
+	RTE_LOG(ERR, PMD, "Negotiate invalid api version %u from VF %d\n",
+		api_version, vf);
+
+	return -1;
+}
+
+static int
 ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 {
 	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
@@ -512,6 +534,9 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 	case IXGBE_VF_SET_VLAN:
 		retval = ixgbe_vf_set_vlan(dev, vf, msgbuf);
 		break;
+	case IXGBE_VF_API_NEGOTIATE:
+		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
+		break;
 	default:
 		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v6 3/6] ixgbe: Get VF queue number
  2015-01-12  5:59         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 1/6] ixgbe: Code cleanup Ouyang Changchun
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
@ 2015-01-12  5:59           ` Ouyang Changchun
  2015-01-19  9:13             ` Wodkowski, PawelX
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 4/6] ether: Check VMDq RSS mode Ouyang Changchun
                             ` (3 subsequent siblings)
  6 siblings, 1 reply; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-12  5:59 UTC (permalink / raw)
  To: dev

Get the available Rx and Tx queue number when receiving IXGBE_VF_GET_QUEUES message from VF.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

changes in v5
  - Add some 'FIX ME' comments for IXGBE_VF_TRANS_VLAN.

---
 lib/librte_pmd_ixgbe/ixgbe_pf.c | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index 495aff5..dbda9b5 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -53,6 +53,8 @@
 #include "ixgbe_ethdev.h"
 
 #define IXGBE_MAX_VFTA     (128)
+#define IXGBE_VF_MSG_SIZE_DEFAULT 1
+#define IXGBE_VF_GET_QUEUE_MSG_SIZE 5
 
 static inline uint16_t
 dev_num_vf(struct rte_eth_dev *eth_dev)
@@ -491,9 +493,41 @@ ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
 }
 
 static int
+ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
+{
+	struct ixgbe_vf_info *vfinfo =
+		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
+	uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+
+	/* Verify if the PF supports the mbox APIs version or not */
+	switch (vfinfo[vf].api_version) {
+	case ixgbe_mbox_api_20:
+	case ixgbe_mbox_api_11:
+		break;
+	default:
+		return -1;
+	}
+
+	/* Notify VF of Rx and Tx queue number */
+	msgbuf[IXGBE_VF_RX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+	msgbuf[IXGBE_VF_TX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+
+	/* Notify VF of default queue */
+	msgbuf[IXGBE_VF_DEF_QUEUE] = default_q;
+
+	/*
+	 * FIX ME if it needs fill msgbuf[IXGBE_VF_TRANS_VLAN]
+	 * for VLAN strip or VMDQ_DCB or VMDQ_DCB_RSS
+	 */
+
+	return 0;
+}
+
+static int
 ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 {
 	uint16_t mbx_size = IXGBE_VFMAILBOX_SIZE;
+	uint16_t msg_size = IXGBE_VF_MSG_SIZE_DEFAULT;
 	uint32_t msgbuf[IXGBE_VFMAILBOX_SIZE];
 	int32_t retval;
 	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -537,6 +571,10 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 	case IXGBE_VF_API_NEGOTIATE:
 		retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
 		break;
+	case IXGBE_VF_GET_QUEUES:
+		retval = ixgbe_get_vf_queues(dev, vf, msgbuf);
+		msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE;
+		break;
 	default:
 		PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
@@ -551,7 +589,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
 
 	msgbuf[0] |= IXGBE_VT_MSGTYPE_CTS;
 
-	ixgbe_write_mbx(hw, msgbuf, 1, vf);
+	ixgbe_write_mbx(hw, msgbuf, msg_size, vf);
 
 	return retval;
 }
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v6 4/6] ether: Check VMDq RSS mode
  2015-01-12  5:59         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
                             ` (2 preceding siblings ...)
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 3/6] ixgbe: Get VF queue number Ouyang Changchun
@ 2015-01-12  5:59           ` Ouyang Changchun
  2015-01-12 14:06             ` Vlad Zolotarov
                               ` (2 more replies)
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS Ouyang Changchun
                             ` (2 subsequent siblings)
  6 siblings, 3 replies; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-12  5:59 UTC (permalink / raw)
  To: dev

Check mq mode for VMDq RSS, handle it correctly instead of returning an error;
Also remove the limitation of per pool queue number has max value of 1, because
the per pool queue number could be 2 or 4 if it is VMDq RSS mode;

The number of rxq specified in config will determine the mq mode for VMDq RSS.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

changes in v6:
  - More clear error message when queue number is invalid.

changes in v5:
  - Fix '<' issue, it should be '<=' to test rxq number;
  - Extract a function to remove the embeded switch-case statement.

---
 lib/librte_ether/rte_ethdev.c | 51 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 46 insertions(+), 5 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 95f2ceb..e9e3368 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -503,6 +503,31 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
 }
 
 static int
+rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	switch (nb_rx_q) {
+	case 1:
+	case 2:
+		RTE_ETH_DEV_SRIOV(dev).active =
+			ETH_64_POOLS;
+		break;
+	case 4:
+		RTE_ETH_DEV_SRIOV(dev).active =
+			ETH_32_POOLS;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
+	RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
+		dev->pci_dev->max_vfs * nb_rx_q;
+
+	return 0;
+}
+
+static int
 rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		      const struct rte_eth_conf *dev_conf)
 {
@@ -510,8 +535,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 
 	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
 		/* check multi-queue mode */
-		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
-		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
+		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
 		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||
 		    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
 			/* SRIOV only works in VMDq enable mode */
@@ -525,7 +549,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		}
 
 		switch (dev_conf->rxmode.mq_mode) {
-		case ETH_MQ_RX_VMDQ_RSS:
 		case ETH_MQ_RX_VMDQ_DCB:
 		case ETH_MQ_RX_VMDQ_DCB_RSS:
 			/* DCB/RSS VMDQ in SRIOV mode, not implement yet */
@@ -534,6 +557,26 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 					"unsupported VMDQ mq_mode rx %u\n",
 					port_id, dev_conf->rxmode.mq_mode);
 			return (-EINVAL);
+		case ETH_MQ_RX_RSS:
+			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
+					" SRIOV active, "
+					"Rx mq mode is changed from:"
+					"mq_mode %u into VMDQ mq_mode %u\n",
+					port_id,
+					dev_conf->rxmode.mq_mode,
+					dev->data->dev_conf.rxmode.mq_mode);
+		case ETH_MQ_RX_VMDQ_RSS:
+			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_RSS;
+			if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)
+				if (rte_eth_dev_check_vf_rss_rxq_num(port_id, nb_rx_q) != 0) {
+					PMD_DEBUG_TRACE("ethdev port_id=%d"
+						" SRIOV active, invalid queue"
+						" number for VMDQ RSS, allowed"
+						" value are 1, 2 or 4\n",
+						port_id);
+					return -EINVAL;
+				}
+			break;
 		default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
 			/* if nothing mq mode configure, use default scheme */
 			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;
@@ -553,8 +596,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
 			/* if nothing mq mode configure, use default scheme */
 			dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;
-			if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
-				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
 			break;
 		}
 
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
  2015-01-12  5:59         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
                             ` (3 preceding siblings ...)
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 4/6] ether: Check VMDq RSS mode Ouyang Changchun
@ 2015-01-12  5:59           ` Ouyang Changchun
  2015-01-12 14:04             ` Vlad Zolotarov
  2015-01-20  9:35             ` Wodkowski, PawelX
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
  2015-01-18 22:24           ` [dpdk-dev] [PATCH v6 0/6] Enable VF RSS for Niantic Thomas Monjalon
  6 siblings, 2 replies; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-12  5:59 UTC (permalink / raw)
  To: dev

It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF RSS.

The psrtype will determine how many queues the received packets will distribute to,
and the value of psrtype should depends on both facet: max VF rxq number which
has been negotiated with PF, and the number of rxq specified in config on guest.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

Changes in v6:
  - Raise an error for the case of ETH_16_POOLS in config vf rss, as the previous 
    logic have changed it into: ETH_32_POOLS.

Changes in v4:
 - The number of rxq from config should be power of 2 and should not bigger than
    max VF rxq number(negotiated between guest and host).

---
 lib/librte_pmd_ixgbe/ixgbe_pf.c   |  15 ++++++
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 102 +++++++++++++++++++++++++++++++++-----
 2 files changed, 105 insertions(+), 12 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index dbda9b5..93f6e43 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev *eth_dev)
 	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw->mac.num_rar_entries), 0);
 	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw->mac.num_rar_entries), 0);
 
+	/*
+	 * VF RSS can support at most 4 queues for each VF, even if
+	 * 8 queues are available for each VF, it need refine to 4
+	 * queues here due to this limitation, otherwise no queue
+	 * will receive any packet even RSS is enabled.
+	 */
+	if (eth_dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_RSS) {
+		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
+			RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_32_POOLS;
+			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
+			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
+				dev_num_vf(eth_dev) * 4;
+		}
+	}
+
 	/* set VMDq map to default PF pool */
 	hw->mac.ops.set_vmdq(hw, 0, RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
 
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index f69abda..20627df 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3327,6 +3327,67 @@ ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
 }
 
 static int
+ixgbe_config_vf_rss(struct rte_eth_dev *dev)
+{
+	struct ixgbe_hw *hw;
+	uint32_t mrqc;
+
+	ixgbe_rss_configure(dev);
+
+	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+	/* MRQC: enable VF RSS */
+	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
+	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
+	switch (RTE_ETH_DEV_SRIOV(dev).active) {
+	case ETH_64_POOLS:
+		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
+		break;
+
+	case ETH_32_POOLS:
+		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
+		break;
+
+	default:
+		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
+		return -EINVAL;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+
+	return 0;
+}
+
+static int
+ixgbe_config_vf_default(struct rte_eth_dev *dev)
+{
+	struct ixgbe_hw *hw =
+		IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+	switch (RTE_ETH_DEV_SRIOV(dev).active) {
+	case ETH_64_POOLS:
+		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+			IXGBE_MRQC_VMDQEN);
+		break;
+
+	case ETH_32_POOLS:
+		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+			IXGBE_MRQC_VMDQRT4TCEN);
+		break;
+
+	case ETH_16_POOLS:
+		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+			IXGBE_MRQC_VMDQRT8TCEN);
+		break;
+	default:
+		PMD_INIT_LOG(ERR,
+			"invalid pool number in IOV mode");
+		break;
+	}
+	return 0;
+}
+
+static int
 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw *hw =
@@ -3358,24 +3419,25 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
 			default: ixgbe_rss_disable(dev);
 		}
 	} else {
-		switch (RTE_ETH_DEV_SRIOV(dev).active) {
 		/*
 		 * SRIOV active scheme
-		 * FIXME if support DCB/RSS together with VMDq & SRIOV
+		 * Support RSS together with VMDq & SRIOV
 		 */
-		case ETH_64_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQEN);
-			break;
-
-		case ETH_32_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT4TCEN);
+		switch (dev->data->dev_conf.rxmode.mq_mode) {
+		case ETH_MQ_RX_RSS:
+		case ETH_MQ_RX_VMDQ_RSS:
+			ixgbe_config_vf_rss(dev);
 			break;
 
-		case ETH_16_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT8TCEN);
-			break;
+		/* FIXME if support DCB/RSS together with VMDq & SRIOV */
+		case ETH_MQ_RX_VMDQ_DCB:
+		case ETH_MQ_RX_VMDQ_DCB_RSS:
+			PMD_INIT_LOG(ERR,
+				"Could not support DCB with VMDq & SRIOV");
+			return -1;
 		default:
-			PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
+			ixgbe_config_vf_default(dev);
+			break;
 		}
 	}
 
@@ -3993,6 +4055,19 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 	PMD_INIT_FUNC_TRACE();
 	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
+	if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
+		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
+			"it should be power of 2");
+		return -1;
+	}
+
+	if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
+		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
+			"it should be equal to or less than %d",
+			hw->mac.max_rx_queues);
+		return -1;
+	}
+
 	/*
 	 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
 	 * disables the VF receipt of packets if the PF MTU is > 1500.
@@ -4094,6 +4169,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 			IXGBE_PSRTYPE_IPV6HDR;
 #endif
 
+	/* Set RQPL for VF RSS according to max Rx queue */
+	psrtype |= (dev->data->nb_rx_queues >> 1) <<
+		IXGBE_PSRTYPE_RQPL_SHIFT;
 	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
 
 	if (dev->data->dev_conf.rxmode.enable_scatter) {
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* [dpdk-dev] [PATCH v6 6/6] testpmd: Set Rx VMDq RSS mode
  2015-01-12  5:59         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
                             ` (4 preceding siblings ...)
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS Ouyang Changchun
@ 2015-01-12  5:59           ` Ouyang Changchun
  2015-01-12 14:05             ` Vlad Zolotarov
  2015-01-18 22:24           ` [dpdk-dev] [PATCH v6 0/6] Enable VF RSS for Niantic Thomas Monjalon
  6 siblings, 1 reply; 144+ messages in thread
From: Ouyang Changchun @ 2015-01-12  5:59 UTC (permalink / raw)
  To: dev

Set VMDq RSS mode if it has VF(VF number is more than 1) and has RSS information.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

changes in v6
  - Put common statement outside the if branch.

changes in v5
  - Assign txmode.mq_mode with ETH_MQ_TX_NONE explicitly;
  - Remove one line wrong comment.

---
 app/test-pmd/testpmd.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 8c69756..773b8af 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -1700,7 +1700,6 @@ init_port_config(void)
 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
 		}
 
-		/* In SR-IOV mode, RSS mode is not available */
 		if (port->dcb_flag == 0 && port->dev_info.max_vfs == 0) {
 			if( port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
@@ -1708,6 +1707,17 @@ init_port_config(void)
 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
 		}
 
+		if (port->dev_info.max_vfs != 0) {
+			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
+				port->dev_conf.rxmode.mq_mode =
+					ETH_MQ_RX_VMDQ_RSS;
+			else
+				port->dev_conf.rxmode.mq_mode =
+					ETH_MQ_RX_NONE;
+
+			port->dev_conf.txmode.mq_mode = ETH_MQ_TX_NONE;
+		}
+
 		port->rx_conf.rx_thresh = rx_thresh;
 		port->rx_conf.rx_free_thresh = rx_free_thresh;
 		port->rx_conf.rx_drop_en = rx_drop_en;
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
  2015-01-12  3:41                   ` Ouyang, Changchun
@ 2015-01-12 13:58                     ` Vlad Zolotarov
  2015-01-13  1:50                       ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-12 13:58 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 01/12/15 05:41, Ouyang, Changchun wrote:
>
> *From:*Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> *Sent:* Friday, January 09, 2015 9:50 PM
> *To:* Ouyang, Changchun; dev@dpdk.org
> *Subject:* Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
>
> On 01/09/15 07:54, Ouyang, Changchun wrote:
>
>       
>
>       
>
>         -----Original Message-----
>
>         From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>
>         Sent: Friday, January 9, 2015 2:49 AM
>
>         To: Ouyang, Changchun;dev@dpdk.org  <mailto:dev@dpdk.org>
>
>         Subject: Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
>
>           
>
>           
>
>         On 01/08/15 11:19, Vlad Zolotarov wrote:
>
>               
>
>             On 01/07/15 08:32, Ouyang Changchun wrote:
>
>                 Check mq mode for VMDq RSS, handle it correctly instead of returning
>
>                 an error; Also remove the limitation of per pool queue number has max
>
>                 value of 1, because the per pool queue number could be 2 or 4 if it
>
>                 is VMDq RSS mode;
>
>                   
>
>                 The number of rxq specified in config will determine the mq mode for
>
>                 VMDq RSS.
>
>                   
>
>                 Signed-off-by: Changchun Ouyang<changchun.ouyang@intel.com>  <mailto:changchun.ouyang@intel.com>
>
>                   
>
>                 changes in v5:
>
>                     - Fix '<' issue, it should be '<=' to test rxq number;
>
>                     - Extract a function to remove the embeded switch-case statement.
>
>                   
>
>                 ---
>
>                    lib/librte_ether/rte_ethdev.c | 50
>
>                 ++++++++++++++++++++++++++++++++++++++-----
>
>                    1 file changed, 45 insertions(+), 5 deletions(-)
>
>                   
>
>                 diff --git a/lib/librte_ether/rte_ethdev.c
>
>                 b/lib/librte_ether/rte_ethdev.c index 95f2ceb..8363e26 100644
>
>                 --- a/lib/librte_ether/rte_ethdev.c
>
>                 +++ b/lib/librte_ether/rte_ethdev.c
>
>                 @@ -503,6 +503,31 @@ rte_eth_dev_tx_queue_config(struct
>
>         rte_eth_dev
>
>                 *dev, uint16_t nb_queues)
>
>                    }
>
>                      static int
>
>                 +rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)
>
>                 +{
>
>                 +    struct rte_eth_dev *dev = &rte_eth_devices[port_id];
>
>                 +    switch (nb_rx_q) {
>
>                 +    case 1:
>
>                 +    case 2:
>
>                 +        RTE_ETH_DEV_SRIOV(dev).active =
>
>                 +            ETH_64_POOLS;
>
>                 +        break;
>
>                 +    case 4:
>
>                 +        RTE_ETH_DEV_SRIOV(dev).active =
>
>                 +            ETH_32_POOLS;
>
>                 +        break;
>
>                 +    default:
>
>                 +        return -EINVAL;
>
>                 +    }
>
>                 +
>
>                 +    RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
>
>                 +    RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
>
>                 +        dev->pci_dev->max_vfs * nb_rx_q;
>
>                 +
>
>                 +    return 0;
>
>                 +}
>
>                 +
>
>                 +static int
>
>                    rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
>
>                 uint16_t nb_tx_q,
>
>                                  const struct rte_eth_conf *dev_conf)
>
>                    {
>
>                 @@ -510,8 +535,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,
>
>                 uint16_t nb_rx_q, uint16_t nb_tx_q,
>
>                          if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
>
>                            /* check multi-queue mode */
>
>                 -        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
>
>                 -            (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>
>                 +        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>
>                                (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||
>
>                                (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
>
>                                /* SRIOV only works in VMDq enable mode */ @@ -525,7
>
>                 +549,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t
>
>                 nb_rx_q, uint16_t nb_tx_q,
>
>                            }
>
>                              switch (dev_conf->rxmode.mq_mode) {
>
>                 -        case ETH_MQ_RX_VMDQ_RSS:
>
>                            case ETH_MQ_RX_VMDQ_DCB:
>
>                            case ETH_MQ_RX_VMDQ_DCB_RSS:
>
>                                /* DCB/RSS VMDQ in SRIOV mode, not implement yet */ @@
>
>                 -534,6 +557,25 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,
>
>         uint16_t
>
>                 nb_rx_q, uint16_t nb_tx_q,
>
>                                        "unsupported VMDQ mq_mode rx %u\n",
>
>                                        port_id, dev_conf->rxmode.mq_mode);
>
>                                return (-EINVAL);
>
>                 +        case ETH_MQ_RX_RSS:
>
>                 +            PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
>
>                 +                    " SRIOV active, "
>
>                 +                    "Rx mq mode is changed from:"
>
>                 +                    "mq_mode %u into VMDQ mq_mode %u\n",
>
>                 +                    port_id,
>
>                 +                    dev_conf->rxmode.mq_mode,
>
>                 +                    dev->data->dev_conf.rxmode.mq_mode);
>
>                 +        case ETH_MQ_RX_VMDQ_RSS:
>
>                 +            dev->data->dev_conf.rxmode.mq_mode =
>
>         ETH_MQ_RX_VMDQ_RSS;
>
>                 +            if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)
>
>                 +                if (rte_eth_dev_check_vf_rss_rxq_num(port_id,
>
>                 nb_rx_q) != 0) {
>
>                 +                    PMD_DEBUG_TRACE("ethdev port_id=%d"
>
>                 +                        " SRIOV active, invalid queue"
>
>                 +                        " number for VMDQ RSS\n",
>
>                 +                        port_id);
>
>               
>
>             Some nitpicking here: I'd add the allowed values descriptions to the
>
>             error message. Something like: "invalid queue number for VMDQ RSS.
>
>             Allowed values are 1, 2 or 4\n".
>
>               
>
>                 +                    return -EINVAL;
>
>                 +                }
>
>                 +            break;
>
>                            default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
>
>                                /* if nothing mq mode configure, use default scheme */
>
>                                dev->data->dev_conf.rxmode.mq_mode =
>
>                 ETH_MQ_RX_VMDQ_ONLY; @@ -553,8 +595,6 @@
>
>                 rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
>
>         uint16_t nb_tx_q,
>
>                            default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
>
>                                /* if nothing mq mode configure, use default scheme */
>
>                                dev->data->dev_conf.txmode.mq_mode =
>
>         ETH_MQ_TX_VMDQ_ONLY;
>
>                 -            if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
>
>                 -                RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
>
>               
>
>             I'm not sure u may just remove it. These lines originally belong to a
>
>             different flow. Are u sure u can remove them like that? What if the
>
>             mq_mode is ETH_MQ_RX_NONE and nb_q_per_pool has been initialized
>
>         to 4
>
>             or 8 in ixgbe_pf_host_init()?
>
>           
>
>         I misread the patch - these lines belong to the txmode.mq_mode switch case.
>
>         I think it's ok to remove these really strange lines here. And when I look at it i
>
>         think for the similar reasons the similar lines should be removed in the Rx
>
>         case too: consider non-RSS case with MQ DCB Tx configuration.
>
>           
>
>     I search code in this function, only one place has
>
>     " if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
>
>                 RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;"
>
>       
>
>     The only place is default branch, which is for rx_none, or vmdq_only mode,
>
>
> Here is a snippet of an rte_eth_dev_check_mq_mode() from the current 
> master:
>
>                 switch (dev_conf->rxmode.mq_mode) {
>                 case ETH_MQ_RX_VMDQ_RSS:
>                 case ETH_MQ_RX_VMDQ_DCB:
>                 case ETH_MQ_RX_VMDQ_DCB_RSS:
>                         /* DCB/RSS VMDQ in SRIOV mode, not implement yet */
>                         PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
>                                        " SRIOV active, "
>                                        "unsupported VMDQ mq_mode rx %u\n",
>                                        port_id, dev_conf->rxmode.mq_mode);
>                         return (-EINVAL);
>                 default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
>                         /* if nothing mq mode configure, use default scheme */
>                         dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;
>                         *if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)                 <---- This is one*
> *                                RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;*
>                         break;
>                 }
>   
>                 switch (dev_conf->txmode.mq_mode) {
>                 case ETH_MQ_TX_VMDQ_DCB:
>                         /* DCB VMDQ in SRIOV mode, not implement yet */
>                         PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
>                                        " SRIOV active, "
>                                        "unsupported VMDQ mq_mode tx %u\n",
>                                        port_id, dev_conf->txmode.mq_mode);
>                         return (-EINVAL);
>                 default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
>                         /* if nothing mq mode configure, use default scheme */
>                         dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;
>                         if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)              <------ This is two. This is what your patch is removing
>                                 RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
>                         break;
>                 }
>
>
>
> Changchun: yes you are correct, what I mean in my last response is 
> that only one place AFTER my removal, so there are 2 places before my 
> removal.
> no controversial here.
>
>   
> We don't need remove this, as it should assign as 1 because it did use 1 queue per pool.
>
>
> And why is that? Just because RSS was not enabled? And what if a user 
> wants multiple Tx queues? Mode 1100b of MRQE for instance?
>
> Changchun: I can explain why I need this change(remove the second 
> place) here,
>

I understood why u needed it in the first place. I just say that for 
exactly the same reasons u need to remove the "first place" too. ;)

> In the txmode, when txmode is ETH_MQ_TX_NONE, but the rx mode could 
> either be ETH_MQ_RX_NONE or
>
> ETH_MQ_RX_VMDQ_RSS, so we could not forcedly set nb_q_per_pool into 1 
> just hit the condition of txmode is ETH_MQ_TX_NONE,
>
> Because we need consider it is combination of rx mode is 
> ETH_MQ_RX_VMDQ_RSS, and tx mode is  ETH_MQ_TX_NONE,
>
> In such a case, the queue number per pool could be 1, or 2, or 4.
>
> In another hand, introducing ETH_MQ_TX_VMDQ_RSS for tx mode, seems 
> very strange, because tx side has no rss feature.
>

It's called ETH_MQ_TX_VMDQ_DCB in DPDK notation. ;) However I see that 
it's not yet supported. But *when* it's going to be supported the above 
code will turn to be bogus since u actually don't want to set the 
nb_q_per_pool to 1 neither if Rx mode is not MQ and nor if Tx mode is 
not MQ but only if them **both* *are not MQ. And this "if" is simply 
missing in the rte_eth_dev_check_mq_mode().

>
> thanks Changchun
>
>
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS Ouyang Changchun
@ 2015-01-12 14:04             ` Vlad Zolotarov
  2015-01-20  9:35             ` Wodkowski, PawelX
  1 sibling, 0 replies; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-12 14:04 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 01/12/15 07:59, Ouyang Changchun wrote:
> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF RSS.
>
> The psrtype will determine how many queues the received packets will distribute to,
> and the value of psrtype should depends on both facet: max VF rxq number which
> has been negotiated with PF, and the number of rxq specified in config on guest.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

Reviewed-by: Vlad Zolotarov <vladz@cloudius-systems.com>

>
> Changes in v6:
>    - Raise an error for the case of ETH_16_POOLS in config vf rss, as the previous
>      logic have changed it into: ETH_32_POOLS.
>
> Changes in v4:
>   - The number of rxq from config should be power of 2 and should not bigger than
>      max VF rxq number(negotiated between guest and host).
>
> ---
>   lib/librte_pmd_ixgbe/ixgbe_pf.c   |  15 ++++++
>   lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 102 +++++++++++++++++++++++++++++++++-----
>   2 files changed, 105 insertions(+), 12 deletions(-)
>
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> index dbda9b5..93f6e43 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev *eth_dev)
>   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw->mac.num_rar_entries), 0);
>   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw->mac.num_rar_entries), 0);
>   
> +	/*
> +	 * VF RSS can support at most 4 queues for each VF, even if
> +	 * 8 queues are available for each VF, it need refine to 4
> +	 * queues here due to this limitation, otherwise no queue
> +	 * will receive any packet even RSS is enabled.
> +	 */
> +	if (eth_dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_RSS) {
> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> +			RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_32_POOLS;
> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> +				dev_num_vf(eth_dev) * 4;
> +		}
> +	}
> +
>   	/* set VMDq map to default PF pool */
>   	hw->mac.ops.set_vmdq(hw, 0, RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
>   
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> index f69abda..20627df 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> @@ -3327,6 +3327,67 @@ ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
>   }
>   
>   static int
> +ixgbe_config_vf_rss(struct rte_eth_dev *dev)
> +{
> +	struct ixgbe_hw *hw;
> +	uint32_t mrqc;
> +
> +	ixgbe_rss_configure(dev);
> +
> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +
> +	/* MRQC: enable VF RSS */
> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> +	case ETH_64_POOLS:
> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> +		break;
> +
> +	case ETH_32_POOLS:
> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> +		break;
> +
> +	default:
> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
> +		return -EINVAL;
> +	}
> +
> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> +
> +	return 0;
> +}
> +
> +static int
> +ixgbe_config_vf_default(struct rte_eth_dev *dev)
> +{
> +	struct ixgbe_hw *hw =
> +		IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +
> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> +	case ETH_64_POOLS:
> +		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +			IXGBE_MRQC_VMDQEN);
> +		break;
> +
> +	case ETH_32_POOLS:
> +		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +			IXGBE_MRQC_VMDQRT4TCEN);
> +		break;
> +
> +	case ETH_16_POOLS:
> +		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +			IXGBE_MRQC_VMDQRT8TCEN);
> +		break;
> +	default:
> +		PMD_INIT_LOG(ERR,
> +			"invalid pool number in IOV mode");
> +		break;
> +	}
> +	return 0;
> +}
> +
> +static int
>   ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>   {
>   	struct ixgbe_hw *hw =
> @@ -3358,24 +3419,25 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>   			default: ixgbe_rss_disable(dev);
>   		}
>   	} else {
> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
>   		/*
>   		 * SRIOV active scheme
> -		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> +		 * Support RSS together with VMDq & SRIOV
>   		 */
> -		case ETH_64_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQEN);
> -			break;
> -
> -		case ETH_32_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT4TCEN);
> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> +		case ETH_MQ_RX_RSS:
> +		case ETH_MQ_RX_VMDQ_RSS:
> +			ixgbe_config_vf_rss(dev);
>   			break;
>   
> -		case ETH_16_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT8TCEN);
> -			break;
> +		/* FIXME if support DCB/RSS together with VMDq & SRIOV */
> +		case ETH_MQ_RX_VMDQ_DCB:
> +		case ETH_MQ_RX_VMDQ_DCB_RSS:
> +			PMD_INIT_LOG(ERR,
> +				"Could not support DCB with VMDq & SRIOV");
> +			return -1;
>   		default:
> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
> +			ixgbe_config_vf_default(dev);
> +			break;
>   		}
>   	}
>   
> @@ -3993,6 +4055,19 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>   	PMD_INIT_FUNC_TRACE();
>   	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>   
> +	if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> +			"it should be power of 2");
> +		return -1;
> +	}
> +
> +	if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> +			"it should be equal to or less than %d",
> +			hw->mac.max_rx_queues);
> +		return -1;
> +	}
> +
>   	/*
>   	 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
>   	 * disables the VF receipt of packets if the PF MTU is > 1500.
> @@ -4094,6 +4169,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>   			IXGBE_PSRTYPE_IPV6HDR;
>   #endif
>   
> +	/* Set RQPL for VF RSS according to max Rx queue */
> +	psrtype |= (dev->data->nb_rx_queues >> 1) <<
> +		IXGBE_PSRTYPE_RQPL_SHIFT;
>   	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
>   
>   	if (dev->data->dev_conf.rxmode.enable_scatter) {

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v6 6/6] testpmd: Set Rx VMDq RSS mode
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
@ 2015-01-12 14:05             ` Vlad Zolotarov
  0 siblings, 0 replies; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-12 14:05 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 01/12/15 07:59, Ouyang Changchun wrote:
> Set VMDq RSS mode if it has VF(VF number is more than 1) and has RSS information.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>


Reviewed-by: Vlad Zolotarov <vladz@cloudius-systems.com>

>
> changes in v6
>    - Put common statement outside the if branch.
>
> changes in v5
>    - Assign txmode.mq_mode with ETH_MQ_TX_NONE explicitly;
>    - Remove one line wrong comment.
>
> ---
>   app/test-pmd/testpmd.c | 12 +++++++++++-
>   1 file changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index 8c69756..773b8af 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -1700,7 +1700,6 @@ init_port_config(void)
>   			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
>   		}
>   
> -		/* In SR-IOV mode, RSS mode is not available */
>   		if (port->dcb_flag == 0 && port->dev_info.max_vfs == 0) {
>   			if( port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
>   				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
> @@ -1708,6 +1707,17 @@ init_port_config(void)
>   				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
>   		}
>   
> +		if (port->dev_info.max_vfs != 0) {
> +			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
> +				port->dev_conf.rxmode.mq_mode =
> +					ETH_MQ_RX_VMDQ_RSS;
> +			else
> +				port->dev_conf.rxmode.mq_mode =
> +					ETH_MQ_RX_NONE;
> +
> +			port->dev_conf.txmode.mq_mode = ETH_MQ_TX_NONE;
> +		}
> +
>   		port->rx_conf.rx_thresh = rx_thresh;
>   		port->rx_conf.rx_free_thresh = rx_free_thresh;
>   		port->rx_conf.rx_drop_en = rx_drop_en;

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v6 4/6] ether: Check VMDq RSS mode
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 4/6] ether: Check VMDq RSS mode Ouyang Changchun
@ 2015-01-12 14:06             ` Vlad Zolotarov
  2015-01-18 22:04             ` Thomas Monjalon
  2015-01-19 10:31             ` Wodkowski, PawelX
  2 siblings, 0 replies; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-12 14:06 UTC (permalink / raw)
  To: Ouyang Changchun, dev


On 01/12/15 07:59, Ouyang Changchun wrote:
> Check mq mode for VMDq RSS, handle it correctly instead of returning an error;
> Also remove the limitation of per pool queue number has max value of 1, because
> the per pool queue number could be 2 or 4 if it is VMDq RSS mode;
>
> The number of rxq specified in config will determine the mq mode for VMDq RSS.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>


Reviewed-by: Vlad Zolotarov <vladz@cloudius-systems.com>

>
> changes in v6:
>    - More clear error message when queue number is invalid.
>
> changes in v5:
>    - Fix '<' issue, it should be '<=' to test rxq number;
>    - Extract a function to remove the embeded switch-case statement.
>
> ---
>   lib/librte_ether/rte_ethdev.c | 51 ++++++++++++++++++++++++++++++++++++++-----
>   1 file changed, 46 insertions(+), 5 deletions(-)
>
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index 95f2ceb..e9e3368 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -503,6 +503,31 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
>   }
>   
>   static int
> +rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)
> +{
> +	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
> +	switch (nb_rx_q) {
> +	case 1:
> +	case 2:
> +		RTE_ETH_DEV_SRIOV(dev).active =
> +			ETH_64_POOLS;
> +		break;
> +	case 4:
> +		RTE_ETH_DEV_SRIOV(dev).active =
> +			ETH_32_POOLS;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
> +	RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
> +		dev->pci_dev->max_vfs * nb_rx_q;
> +
> +	return 0;
> +}
> +
> +static int
>   rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   		      const struct rte_eth_conf *dev_conf)
>   {
> @@ -510,8 +535,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   
>   	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
>   		/* check multi-queue mode */
> -		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
> -		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
> +		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>   		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||
>   		    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
>   			/* SRIOV only works in VMDq enable mode */
> @@ -525,7 +549,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   		}
>   
>   		switch (dev_conf->rxmode.mq_mode) {
> -		case ETH_MQ_RX_VMDQ_RSS:
>   		case ETH_MQ_RX_VMDQ_DCB:
>   		case ETH_MQ_RX_VMDQ_DCB_RSS:
>   			/* DCB/RSS VMDQ in SRIOV mode, not implement yet */
> @@ -534,6 +557,26 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   					"unsupported VMDQ mq_mode rx %u\n",
>   					port_id, dev_conf->rxmode.mq_mode);
>   			return (-EINVAL);
> +		case ETH_MQ_RX_RSS:
> +			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
> +					" SRIOV active, "
> +					"Rx mq mode is changed from:"
> +					"mq_mode %u into VMDQ mq_mode %u\n",
> +					port_id,
> +					dev_conf->rxmode.mq_mode,
> +					dev->data->dev_conf.rxmode.mq_mode);
> +		case ETH_MQ_RX_VMDQ_RSS:
> +			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_RSS;
> +			if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)
> +				if (rte_eth_dev_check_vf_rss_rxq_num(port_id, nb_rx_q) != 0) {
> +					PMD_DEBUG_TRACE("ethdev port_id=%d"
> +						" SRIOV active, invalid queue"
> +						" number for VMDQ RSS, allowed"
> +						" value are 1, 2 or 4\n",
> +						port_id);
> +					return -EINVAL;
> +				}
> +			break;
>   		default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
>   			/* if nothing mq mode configure, use default scheme */
>   			dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;
> @@ -553,8 +596,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   		default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
>   			/* if nothing mq mode configure, use default scheme */
>   			dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;
> -			if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
> -				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
>   			break;
>   		}
>   

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
  2015-01-12 13:58                     ` Vlad Zolotarov
@ 2015-01-13  1:50                       ` Ouyang, Changchun
  2015-01-13  9:00                         ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-13  1:50 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
Sent: Monday, January 12, 2015 9:59 PM
To: Ouyang, Changchun; dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode


On 01/12/15 05:41, Ouyang, Changchun wrote:


From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
Sent: Friday, January 09, 2015 9:50 PM
To: Ouyang, Changchun; dev@dpdk.org<mailto:dev@dpdk.org>
Subject: Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode


On 01/09/15 07:54, Ouyang, Changchun wrote:





-----Original Message-----

From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]

Sent: Friday, January 9, 2015 2:49 AM

To: Ouyang, Changchun; dev@dpdk.org<mailto:dev@dpdk.org>

Subject: Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode





On 01/08/15 11:19, Vlad Zolotarov wrote:



On 01/07/15 08:32, Ouyang Changchun wrote:

Check mq mode for VMDq RSS, handle it correctly instead of returning

an error; Also remove the limitation of per pool queue number has max

value of 1, because the per pool queue number could be 2 or 4 if it

is VMDq RSS mode;



The number of rxq specified in config will determine the mq mode for

VMDq RSS.



Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com><mailto:changchun.ouyang@intel.com>



changes in v5:

   - Fix '<' issue, it should be '<=' to test rxq number;

   - Extract a function to remove the embeded switch-case statement.



---

  lib/librte_ether/rte_ethdev.c | 50

++++++++++++++++++++++++++++++++++++++-----

  1 file changed, 45 insertions(+), 5 deletions(-)



diff --git a/lib/librte_ether/rte_ethdev.c

b/lib/librte_ether/rte_ethdev.c index 95f2ceb..8363e26 100644

--- a/lib/librte_ether/rte_ethdev.c

+++ b/lib/librte_ether/rte_ethdev.c

@@ -503,6 +503,31 @@ rte_eth_dev_tx_queue_config(struct

rte_eth_dev

*dev, uint16_t nb_queues)

  }

    static int

+rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)

+{

+    struct rte_eth_dev *dev = &rte_eth_devices[port_id];

+    switch (nb_rx_q) {

+    case 1:

+    case 2:

+        RTE_ETH_DEV_SRIOV(dev).active =

+            ETH_64_POOLS;

+        break;

+    case 4:

+        RTE_ETH_DEV_SRIOV(dev).active =

+            ETH_32_POOLS;

+        break;

+    default:

+        return -EINVAL;

+    }

+

+    RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;

+    RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =

+        dev->pci_dev->max_vfs * nb_rx_q;

+

+    return 0;

+}

+

+static int

  rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,

uint16_t nb_tx_q,

                const struct rte_eth_conf *dev_conf)

  {

@@ -510,8 +535,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,

uint16_t nb_rx_q, uint16_t nb_tx_q,

        if (RTE_ETH_DEV_SRIOV(dev).active != 0) {

          /* check multi-queue mode */

-        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||

-            (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||

+        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||

              (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||

              (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {

              /* SRIOV only works in VMDq enable mode */ @@ -525,7

+549,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t

nb_rx_q, uint16_t nb_tx_q,

          }

            switch (dev_conf->rxmode.mq_mode) {

-        case ETH_MQ_RX_VMDQ_RSS:

          case ETH_MQ_RX_VMDQ_DCB:

          case ETH_MQ_RX_VMDQ_DCB_RSS:

              /* DCB/RSS VMDQ in SRIOV mode, not implement yet */ @@

-534,6 +557,25 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,

uint16_t

nb_rx_q, uint16_t nb_tx_q,

                      "unsupported VMDQ mq_mode rx %u\n",

                      port_id, dev_conf->rxmode.mq_mode);

              return (-EINVAL);

+        case ETH_MQ_RX_RSS:

+            PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8

+                    " SRIOV active, "

+                    "Rx mq mode is changed from:"

+                    "mq_mode %u into VMDQ mq_mode %u\n",

+                    port_id,

+                    dev_conf->rxmode.mq_mode,

+                    dev->data->dev_conf.rxmode.mq_mode);

+        case ETH_MQ_RX_VMDQ_RSS:

+            dev->data->dev_conf.rxmode.mq_mode =

ETH_MQ_RX_VMDQ_RSS;

+            if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)

+                if (rte_eth_dev_check_vf_rss_rxq_num(port_id,

nb_rx_q) != 0) {

+                    PMD_DEBUG_TRACE("ethdev port_id=%d"

+                        " SRIOV active, invalid queue"

+                        " number for VMDQ RSS\n",

+                        port_id);



Some nitpicking here: I'd add the allowed values descriptions to the

error message. Something like: "invalid queue number for VMDQ RSS.

Allowed values are 1, 2 or 4\n".



+                    return -EINVAL;

+                }

+            break;

          default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */

              /* if nothing mq mode configure, use default scheme */

              dev->data->dev_conf.rxmode.mq_mode =

ETH_MQ_RX_VMDQ_ONLY; @@ -553,8 +595,6 @@

rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,

uint16_t nb_tx_q,

          default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */

              /* if nothing mq mode configure, use default scheme */

              dev->data->dev_conf.txmode.mq_mode =

ETH_MQ_TX_VMDQ_ONLY;

-            if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)

-                RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;



I'm not sure u may just remove it. These lines originally belong to a

different flow. Are u sure u can remove them like that? What if the

mq_mode is ETH_MQ_RX_NONE and nb_q_per_pool has been initialized

to 4

or 8 in ixgbe_pf_host_init()?



I misread the patch - these lines belong to the txmode.mq_mode switch case.

I think it's ok to remove these really strange lines here. And when I look at it i

think for the similar reasons the similar lines should be removed in the Rx

case too: consider non-RSS case with MQ DCB Tx configuration.



I search code in this function, only one place has

" if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)

           RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;"



The only place is default branch, which is for rx_none, or vmdq_only mode,

Here is a snippet of an rte_eth_dev_check_mq_mode() from the current master:

               switch (dev_conf->rxmode.mq_mode) {

               case ETH_MQ_RX_VMDQ_RSS:

               case ETH_MQ_RX_VMDQ_DCB:

               case ETH_MQ_RX_VMDQ_DCB_RSS:

                       /* DCB/RSS VMDQ in SRIOV mode, not implement yet */

                       PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8

                                      " SRIOV active, "

                                      "unsupported VMDQ mq_mode rx %u\n",

                                      port_id, dev_conf->rxmode.mq_mode);

                       return (-EINVAL);

               default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */

                       /* if nothing mq mode configure, use default scheme */

                       dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;

                       if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)                 <---- This is one

                               RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;

                       break;

               }



               switch (dev_conf->txmode.mq_mode) {

               case ETH_MQ_TX_VMDQ_DCB:

                       /* DCB VMDQ in SRIOV mode, not implement yet */

                       PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8

                                      " SRIOV active, "

                                      "unsupported VMDQ mq_mode tx %u\n",

                                      port_id, dev_conf->txmode.mq_mode);

                       return (-EINVAL);

               default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */

                       /* if nothing mq mode configure, use default scheme */

                       dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;

                       if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)              <------ This is two. This is what your patch is removing

                               RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;

                       break;

               }



Changchun: yes you are correct, what I mean in my last response is that only one place AFTER my removal, so there are 2 places before my removal.
no controversial here.





We don't need remove this, as it should assign as 1 because it did use 1 queue per pool.

And why is that? Just because RSS was not enabled? And what if a user wants multiple Tx queues? Mode 1100b of MRQE for instance?

Changchun: I can explain why I need this change(remove the second place) here,

   I understood why u needed it in the first place. I just say that for exactly the same reasons u need to remove the "first place" too. ;)


Changchun: then I will try to explain why I can't remove the first place :)
When the rx mode is ETH_MQ_RX_NONE and tx mode is ETH_MQ_TX_NONE,
The function ixgbe_pf_host_init still set the nb_q_per_pool into 2 or 4 or 8 according to max vf num,
(actually at that point, it has no knowledge of what is the rx and tx configuration value, so have to just set
an estimated (and not so accurate) value according to the max vf num)
then in the check_mq_mode function, need further refine this value according to a few factors:
sriov.active, and rxmode.mq_mode.
When it finds the rx mode is RX_NONE, and the nb_q_per_pool is larger than 1, then it should refine to 1.
So if I remove the first place, VMDQ_RSS case works well, but I break the case of RX_NONE.

So I think we can't treat rx path and tx path in absolutely same way here, i.e. if you add it in the first place(rx path) then you need also add it in the second place(tx path)
Vice versa,
that's my understanding :)

Thanks
Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
  2015-01-13  1:50                       ` Ouyang, Changchun
@ 2015-01-13  9:00                         ` Vlad Zolotarov
  2015-01-14  0:44                           ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-13  9:00 UTC (permalink / raw)
  To: Ouyang, Changchun, dev


On 01/13/15 03:50, Ouyang, Changchun wrote:
>
> *From:*Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> *Sent:* Monday, January 12, 2015 9:59 PM
> *To:* Ouyang, Changchun; dev@dpdk.org
> *Subject:* Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
>
> On 01/12/15 05:41, Ouyang, Changchun wrote:
>
>     *From:*Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>     *Sent:* Friday, January 09, 2015 9:50 PM
>     *To:* Ouyang, Changchun; dev@dpdk.org <mailto:dev@dpdk.org>
>     *Subject:* Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
>
>     On 01/09/15 07:54, Ouyang, Changchun wrote:
>
>           
>
>           
>
>             -----Original Message-----
>
>             From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>
>             Sent: Friday, January 9, 2015 2:49 AM
>
>             To: Ouyang, Changchun;dev@dpdk.org  <mailto:dev@dpdk.org>
>
>             Subject: Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
>
>               
>
>               
>
>             On 01/08/15 11:19, Vlad Zolotarov wrote:
>
>                   
>
>                 On 01/07/15 08:32, Ouyang Changchun wrote:
>
>                     Check mq mode for VMDq RSS, handle it correctly instead of returning
>
>                     an error; Also remove the limitation of per pool queue number has max
>
>                     value of 1, because the per pool queue number could be 2 or 4 if it
>
>                     is VMDq RSS mode;
>
>                       
>
>                     The number of rxq specified in config will determine the mq mode for
>
>                     VMDq RSS.
>
>                       
>
>                     Signed-off-by: Changchun Ouyang<changchun.ouyang@intel.com>  <mailto:changchun.ouyang@intel.com>
>
>                       
>
>                     changes in v5:
>
>                         - Fix '<' issue, it should be '<=' to test rxq number;
>
>                         - Extract a function to remove the embeded switch-case statement.
>
>                       
>
>                     ---
>
>                        lib/librte_ether/rte_ethdev.c | 50
>
>                     ++++++++++++++++++++++++++++++++++++++-----
>
>                        1 file changed, 45 insertions(+), 5 deletions(-)
>
>                       
>
>                     diff --git a/lib/librte_ether/rte_ethdev.c
>
>                     b/lib/librte_ether/rte_ethdev.c index 95f2ceb..8363e26 100644
>
>                     --- a/lib/librte_ether/rte_ethdev.c
>
>                     +++ b/lib/librte_ether/rte_ethdev.c
>
>                     @@ -503,6 +503,31 @@ rte_eth_dev_tx_queue_config(struct
>
>             rte_eth_dev
>
>                     *dev, uint16_t nb_queues)
>
>                        }
>
>                          static int
>
>                     +rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)
>
>                     +{
>
>                     +    struct rte_eth_dev *dev = &rte_eth_devices[port_id];
>
>                     +    switch (nb_rx_q) {
>
>                     +    case 1:
>
>                     +    case 2:
>
>                     +        RTE_ETH_DEV_SRIOV(dev).active =
>
>                     +            ETH_64_POOLS;
>
>                     +        break;
>
>                     +    case 4:
>
>                     +        RTE_ETH_DEV_SRIOV(dev).active =
>
>                     +            ETH_32_POOLS;
>
>                     +        break;
>
>                     +    default:
>
>                     +        return -EINVAL;
>
>                     +    }
>
>                     +
>
>                     +    RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
>
>                     +    RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
>
>                     +        dev->pci_dev->max_vfs * nb_rx_q;
>
>                     +
>
>                     +    return 0;
>
>                     +}
>
>                     +
>
>                     +static int
>
>                        rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
>
>                     uint16_t nb_tx_q,
>
>                                      const struct rte_eth_conf *dev_conf)
>
>                        {
>
>                     @@ -510,8 +535,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,
>
>                     uint16_t nb_rx_q, uint16_t nb_tx_q,
>
>                              if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
>
>                                /* check multi-queue mode */
>
>                     -        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
>
>                     -            (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>
>                     +        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>
>                                    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||
>
>                                    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
>
>                                    /* SRIOV only works in VMDq enable mode */ @@ -525,7
>
>                     +549,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t
>
>                     nb_rx_q, uint16_t nb_tx_q,
>
>                                }
>
>                                  switch (dev_conf->rxmode.mq_mode) {
>
>                     -        case ETH_MQ_RX_VMDQ_RSS:
>
>                                case ETH_MQ_RX_VMDQ_DCB:
>
>                                case ETH_MQ_RX_VMDQ_DCB_RSS:
>
>                                    /* DCB/RSS VMDQ in SRIOV mode, not implement yet */ @@
>
>                     -534,6 +557,25 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,
>
>             uint16_t
>
>                     nb_rx_q, uint16_t nb_tx_q,
>
>                                            "unsupported VMDQ mq_mode rx %u\n",
>
>                                            port_id, dev_conf->rxmode.mq_mode);
>
>                                    return (-EINVAL);
>
>                     +        case ETH_MQ_RX_RSS:
>
>                     +            PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
>
>                     +                    " SRIOV active, "
>
>                     +                    "Rx mq mode is changed from:"
>
>                     +                    "mq_mode %u into VMDQ mq_mode %u\n",
>
>                     +                    port_id,
>
>                     +                    dev_conf->rxmode.mq_mode,
>
>                     +                    dev->data->dev_conf.rxmode.mq_mode);
>
>                     +        case ETH_MQ_RX_VMDQ_RSS:
>
>                     +            dev->data->dev_conf.rxmode.mq_mode =
>
>             ETH_MQ_RX_VMDQ_RSS;
>
>                     +            if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)
>
>                     +                if (rte_eth_dev_check_vf_rss_rxq_num(port_id,
>
>                     nb_rx_q) != 0) {
>
>                     +                    PMD_DEBUG_TRACE("ethdev port_id=%d"
>
>                     +                        " SRIOV active, invalid queue"
>
>                     +                        " number for VMDQ RSS\n",
>
>                     +                        port_id);
>
>                   
>
>                 Some nitpicking here: I'd add the allowed values descriptions to the
>
>                 error message. Something like: "invalid queue number for VMDQ RSS.
>
>                 Allowed values are 1, 2 or 4\n".
>
>                   
>
>                     +                    return -EINVAL;
>
>                     +                }
>
>                     +            break;
>
>                                default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
>
>                                    /* if nothing mq mode configure, use default scheme */
>
>                                    dev->data->dev_conf.rxmode.mq_mode =
>
>                     ETH_MQ_RX_VMDQ_ONLY; @@ -553,8 +595,6 @@
>
>                     rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
>
>             uint16_t nb_tx_q,
>
>                                default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
>
>                                    /* if nothing mq mode configure, use default scheme */
>
>                                    dev->data->dev_conf.txmode.mq_mode =
>
>             ETH_MQ_TX_VMDQ_ONLY;
>
>                     -            if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
>
>                     -                RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
>
>                   
>
>                 I'm not sure u may just remove it. These lines originally belong to a
>
>                 different flow. Are u sure u can remove them like that? What if the
>
>                 mq_mode is ETH_MQ_RX_NONE and nb_q_per_pool has been initialized
>
>             to 4
>
>                 or 8 in ixgbe_pf_host_init()?
>
>               
>
>             I misread the patch - these lines belong to the txmode.mq_mode switch case.
>
>             I think it's ok to remove these really strange lines here. And when I look at it i
>
>             think for the similar reasons the similar lines should be removed in the Rx
>
>             case too: consider non-RSS case with MQ DCB Tx configuration.
>
>               
>
>         I search code in this function, only one place has
>
>         " if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
>
>                     RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;"
>
>           
>
>         The only place is default branch, which is for rx_none, or vmdq_only mode,
>
>
>     Here is a snippet of an rte_eth_dev_check_mq_mode() from the
>     current master:
>
>                     switch (dev_conf->rxmode.mq_mode) {
>
>                     case ETH_MQ_RX_VMDQ_RSS:
>
>                     case ETH_MQ_RX_VMDQ_DCB:
>
>                     case ETH_MQ_RX_VMDQ_DCB_RSS:
>
>                             /* DCB/RSS VMDQ in SRIOV mode, not implement yet */
>
>                             PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
>
>                                            " SRIOV active, "
>
>                                            "unsupported VMDQ mq_mode rx %u\n",
>
>                                            port_id, dev_conf->rxmode.mq_mode);
>
>                             return (-EINVAL);
>
>                     default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */
>
>                             /* if nothing mq mode configure, use default scheme */
>
>                             dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;
>
>                             *if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)                 <---- This is one*
>
>     *                                RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;*
>
>                             break;
>
>                     }
>
>       
>
>                     switch (dev_conf->txmode.mq_mode) {
>
>                     case ETH_MQ_TX_VMDQ_DCB:
>
>                             /* DCB VMDQ in SRIOV mode, not implement yet */
>
>                             PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
>
>                                            " SRIOV active, "
>
>                                            "unsupported VMDQ mq_mode tx %u\n",
>
>                                            port_id, dev_conf->txmode.mq_mode);
>
>                             return (-EINVAL);
>
>                     default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
>
>                             /* if nothing mq mode configure, use default scheme */
>
>                             dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;
>
>                             if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)              <------ This is two. This is what your patch is removing
>
>                                     RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
>
>                             break;
>
>                     }
>
>
>
>
>     Changchun: yes you are correct, what I mean in my last response is
>     that only one place AFTER my removal, so there are 2 places before
>     my removal.
>     no controversial here.
>
>
>       
>
>     We don't need remove this, as it should assign as 1 because it did use 1 queue per pool.
>
>
>     And why is that? Just because RSS was not enabled? And what if a
>     user wants multiple Tx queues? Mode 1100b of MRQE for instance?
>
>     Changchun: I can explain why I need this change(remove the second
>     place) here,
>
>
> I understood why u needed it in the first place. I just say that for 
> exactly the same reasons u need to remove the "first place" too. ;)
>
> Changchun: then I will try to explain why I can’t remove the first place J
>
> When the rx mode is ETH_MQ_RX_NONE and tx mode is ETH_MQ_TX_NONE,
>
> The function ixgbe_pf_host_init still set the nb_q_per_pool into 2 or 
> 4 or 8 according to max vf num,
>
> (actually at that point, it has no knowledge of what is the rx and tx 
> configuration value, so have to just set
>
> an estimated (and not so accurate) value according to the max vf num)
>
> then in the check_mq_mode function, need further refine this value 
> according to a few factors:
>
> sriov.active, and rxmode.mq_mode.
>
> When it finds the rx mode is RX_NONE, and the nb_q_per_pool is larger 
> than 1, then it should refine to 1.
>
> So if I remove the first place, VMDQ_RSS case works well, but I break 
> the case of RX_NONE.
>
> So I think we can’t treat rx path and tx path in absolutely same way 
> here, i.e. if you add it in the first place(rx path) then you need 
> also add it in the second place(tx path)
>
> Vice versa,
>
> that’s my understanding J
>

And now consider the case when rx_mode == RSS_NONE (since user has 
configured only a single Rx queue) and tx_mode == TX_DCB (user has 
configured 4 Tx queues and requested the above Tx mode). After your 
patch the nb_q_per_pool will still be set to 1 while it should have 
remained 4 because u want a pool to support 4 queues (MRQC.MRQE == 
1010b) but u will configure the PSRTYPE[n].RQPL for this pool to 0.

> Thanks
>
> Changchun
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode
  2015-01-13  9:00                         ` Vlad Zolotarov
@ 2015-01-14  0:44                           ` Ouyang, Changchun
  0 siblings, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-14  0:44 UTC (permalink / raw)
  To: Vlad Zolotarov, dev



From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
Sent: Tuesday, January 13, 2015 5:00 PM
To: Ouyang, Changchun; dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode


On 01/13/15 03:50, Ouyang, Changchun wrote:


From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
Sent: Monday, January 12, 2015 9:59 PM
To: Ouyang, Changchun; dev@dpdk.org<mailto:dev@dpdk.org>
Subject: Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode


On 01/12/15 05:41, Ouyang, Changchun wrote:


From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
Sent: Friday, January 09, 2015 9:50 PM
To: Ouyang, Changchun; dev@dpdk.org<mailto:dev@dpdk.org>
Subject: Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode


On 01/09/15 07:54, Ouyang, Changchun wrote:





-----Original Message-----

From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]

Sent: Friday, January 9, 2015 2:49 AM

To: Ouyang, Changchun; dev@dpdk.org<mailto:dev@dpdk.org>

Subject: Re: [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode





On 01/08/15 11:19, Vlad Zolotarov wrote:



On 01/07/15 08:32, Ouyang Changchun wrote:

Check mq mode for VMDq RSS, handle it correctly instead of returning

an error; Also remove the limitation of per pool queue number has max

value of 1, because the per pool queue number could be 2 or 4 if it

is VMDq RSS mode;



The number of rxq specified in config will determine the mq mode for

VMDq RSS.



Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com><mailto:changchun.ouyang@intel.com>



changes in v5:

   - Fix '<' issue, it should be '<=' to test rxq number;

   - Extract a function to remove the embeded switch-case statement.



---

  lib/librte_ether/rte_ethdev.c | 50

++++++++++++++++++++++++++++++++++++++-----

  1 file changed, 45 insertions(+), 5 deletions(-)



diff --git a/lib/librte_ether/rte_ethdev.c

b/lib/librte_ether/rte_ethdev.c index 95f2ceb..8363e26 100644

--- a/lib/librte_ether/rte_ethdev.c

+++ b/lib/librte_ether/rte_ethdev.c

@@ -503,6 +503,31 @@ rte_eth_dev_tx_queue_config(struct

rte_eth_dev

*dev, uint16_t nb_queues)

  }

    static int

+rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)

+{

+    struct rte_eth_dev *dev = &rte_eth_devices[port_id];

+    switch (nb_rx_q) {

+    case 1:

+    case 2:

+        RTE_ETH_DEV_SRIOV(dev).active =

+            ETH_64_POOLS;

+        break;

+    case 4:

+        RTE_ETH_DEV_SRIOV(dev).active =

+            ETH_32_POOLS;

+        break;

+    default:

+        return -EINVAL;

+    }

+

+    RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;

+    RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =

+        dev->pci_dev->max_vfs * nb_rx_q;

+

+    return 0;

+}

+

+static int

  rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,

uint16_t nb_tx_q,

                const struct rte_eth_conf *dev_conf)

  {

@@ -510,8 +535,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,

uint16_t nb_rx_q, uint16_t nb_tx_q,

        if (RTE_ETH_DEV_SRIOV(dev).active != 0) {

          /* check multi-queue mode */

-        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||

-            (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||

+        if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||

              (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||

              (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {

              /* SRIOV only works in VMDq enable mode */ @@ -525,7

+549,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t

nb_rx_q, uint16_t nb_tx_q,

          }

            switch (dev_conf->rxmode.mq_mode) {

-        case ETH_MQ_RX_VMDQ_RSS:

          case ETH_MQ_RX_VMDQ_DCB:

          case ETH_MQ_RX_VMDQ_DCB_RSS:

              /* DCB/RSS VMDQ in SRIOV mode, not implement yet */ @@

-534,6 +557,25 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,

uint16_t

nb_rx_q, uint16_t nb_tx_q,

                      "unsupported VMDQ mq_mode rx %u\n",

                      port_id, dev_conf->rxmode.mq_mode);

              return (-EINVAL);

+        case ETH_MQ_RX_RSS:

+            PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8

+                    " SRIOV active, "

+                    "Rx mq mode is changed from:"

+                    "mq_mode %u into VMDQ mq_mode %u\n",

+                    port_id,

+                    dev_conf->rxmode.mq_mode,

+                    dev->data->dev_conf.rxmode.mq_mode);

+        case ETH_MQ_RX_VMDQ_RSS:

+            dev->data->dev_conf.rxmode.mq_mode =

ETH_MQ_RX_VMDQ_RSS;

+            if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)

+                if (rte_eth_dev_check_vf_rss_rxq_num(port_id,

nb_rx_q) != 0) {

+                    PMD_DEBUG_TRACE("ethdev port_id=%d"

+                        " SRIOV active, invalid queue"

+                        " number for VMDQ RSS\n",

+                        port_id);



Some nitpicking here: I'd add the allowed values descriptions to the

error message. Something like: "invalid queue number for VMDQ RSS.

Allowed values are 1, 2 or 4\n".



+                    return -EINVAL;

+                }

+            break;

          default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */

              /* if nothing mq mode configure, use default scheme */

              dev->data->dev_conf.rxmode.mq_mode =

ETH_MQ_RX_VMDQ_ONLY; @@ -553,8 +595,6 @@

rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,

uint16_t nb_tx_q,

          default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */

              /* if nothing mq mode configure, use default scheme */

              dev->data->dev_conf.txmode.mq_mode =

ETH_MQ_TX_VMDQ_ONLY;

-            if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)

-                RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;



I'm not sure u may just remove it. These lines originally belong to a

different flow. Are u sure u can remove them like that? What if the

mq_mode is ETH_MQ_RX_NONE and nb_q_per_pool has been initialized

to 4

or 8 in ixgbe_pf_host_init()?



I misread the patch - these lines belong to the txmode.mq_mode switch case.

I think it's ok to remove these really strange lines here. And when I look at it i

think for the similar reasons the similar lines should be removed in the Rx

case too: consider non-RSS case with MQ DCB Tx configuration.



I search code in this function, only one place has

" if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)

           RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;"



The only place is default branch, which is for rx_none, or vmdq_only mode,

Here is a snippet of an rte_eth_dev_check_mq_mode() from the current master:

               switch (dev_conf->rxmode.mq_mode) {

               case ETH_MQ_RX_VMDQ_RSS:

               case ETH_MQ_RX_VMDQ_DCB:

               case ETH_MQ_RX_VMDQ_DCB_RSS:

                       /* DCB/RSS VMDQ in SRIOV mode, not implement yet */

                       PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8

                                      " SRIOV active, "

                                      "unsupported VMDQ mq_mode rx %u\n",

                                      port_id, dev_conf->rxmode.mq_mode);

                       return (-EINVAL);

               default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */

                       /* if nothing mq mode configure, use default scheme */

                       dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;

                       if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)                 <---- This is one

                               RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;

                       break;

               }



               switch (dev_conf->txmode.mq_mode) {

               case ETH_MQ_TX_VMDQ_DCB:

                       /* DCB VMDQ in SRIOV mode, not implement yet */

                       PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8

                                      " SRIOV active, "

                                      "unsupported VMDQ mq_mode tx %u\n",

                                      port_id, dev_conf->txmode.mq_mode);

                       return (-EINVAL);

               default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */

                       /* if nothing mq mode configure, use default scheme */

                       dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;

                       if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)              <------ This is two. This is what your patch is removing

                               RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;

                       break;

               }




Changchun: yes you are correct, what I mean in my last response is that only one place AFTER my removal, so there are 2 places before my removal.
no controversial here.






We don't need remove this, as it should assign as 1 because it did use 1 queue per pool.

And why is that? Just because RSS was not enabled? And what if a user wants multiple Tx queues? Mode 1100b of MRQE for instance?

Changchun: I can explain why I need this change(remove the second place) here,

   I understood why u needed it in the first place. I just say that for exactly the same reasons u need to remove the "first place" too. ;)



Changchun: then I will try to explain why I can't remove the first place :)
When the rx mode is ETH_MQ_RX_NONE and tx mode is ETH_MQ_TX_NONE,
The function ixgbe_pf_host_init still set the nb_q_per_pool into 2 or 4 or 8 according to max vf num,
(actually at that point, it has no knowledge of what is the rx and tx configuration value, so have to just set
an estimated (and not so accurate) value according to the max vf num)
then in the check_mq_mode function, need further refine this value according to a few factors:
sriov.active, and rxmode.mq_mode.
When it finds the rx mode is RX_NONE, and the nb_q_per_pool is larger than 1, then it should refine to 1.
So if I remove the first place, VMDQ_RSS case works well, but I break the case of RX_NONE.

So I think we can't treat rx path and tx path in absolutely same way here, i.e. if you add it in the first place(rx path) then you need also add it in the second place(tx path)
Vice versa,
that's my understanding :)

  And now consider the case when rx_mode == RSS_NONE (since user has configured only a single Rx queue) and tx_mode == TX_DCB (user has configured 4 Tx queues and requested the above Tx     mode). After your patch the nb_q_per_pool will still be set to 1 while it should have remained 4 because u want a pool to support 4 queues (MRQC.MRQE == 1010b) but u will configure the    PSRTYPE[n].RQPL for this pool to 0.

[Changchun]
As currently vmdq dcb is not supported yet, so it don't consider that case, as vf rss(vmdq rss) concerned, this patch is ok, I think you also agree that, am I right?
Go back to your question, considering your case, with vmdq dcb, you are right,
So as we can see Jastrzebski, MichalX K michalx.k.jastrzebski@intel.com<mailto:michalx.k.jastrzebski@intel.com> resolve this issue in his "add dcb for vf for ixgbe" by split nb_q_per_pool into nb_rx_q_per_pool, and nb_tx_q_per_pool,
I thinks that's good way to do it.

So my opinion is we can discuss this in "dcb for vf for ixgbe", because the question is now switch to dcb for vf, not rss for vf itself,
How do you think of it?
Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic
  2015-01-08  9:56         ` [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic Vlad Zolotarov
@ 2015-01-18 21:58           ` Thomas Monjalon
  2015-01-19  9:40             ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Thomas Monjalon @ 2015-01-18 21:58 UTC (permalink / raw)
  To: dev, Ouyang Changchun

2015-01-08 11:56, Vlad Zolotarov:
> I've acked PATCH1 and PATCH2 already before and since there are no 
> changes in them, pls.,  consider them ACKed... ;)

Thank you for the great review.

Note to everybody reading that:
Vlad approved the 2 first patches of the serie at v4 stage.
So it would be clearer (and easier when applying patches) to set the ack or
reviewed-by line in the next versions if there are no changes (or minor) to
these patches.
It's a good habit which is not well spread in DPDK community.

Thanks
-- 
Thomas

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v6 4/6] ether: Check VMDq RSS mode
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 4/6] ether: Check VMDq RSS mode Ouyang Changchun
  2015-01-12 14:06             ` Vlad Zolotarov
@ 2015-01-18 22:04             ` Thomas Monjalon
  2015-01-19 10:31             ` Wodkowski, PawelX
  2 siblings, 0 replies; 144+ messages in thread
From: Thomas Monjalon @ 2015-01-18 22:04 UTC (permalink / raw)
  To: Ouyang Changchun; +Cc: dev

2015-01-12 13:59, Ouyang Changchun:
> Check mq mode for VMDq RSS, handle it correctly instead of returning an error;
> Also remove the limitation of per pool queue number has max value of 1, because
> the per pool queue number could be 2 or 4 if it is VMDq RSS mode;
> 
> The number of rxq specified in config will determine the mq mode for VMDq RSS.
> 
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

I feel that most of this code is Intel-specific and should go in ixgbe or
other PMDs if needed.
OK to apply this patch which improves the current codebase, but please,
start thinking how RSS/VMDQ/DCB features are generic.

-- 
Thomas

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v6 0/6] Enable VF RSS for Niantic
  2015-01-12  5:59         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
                             ` (5 preceding siblings ...)
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
@ 2015-01-18 22:24           ` Thomas Monjalon
  2015-01-19  4:51             ` Ouyang, Changchun
  6 siblings, 1 reply; 144+ messages in thread
From: Thomas Monjalon @ 2015-01-18 22:24 UTC (permalink / raw)
  To: Ouyang Changchun; +Cc: dev

> This patch enables VF RSS for Niantic, which allow each VF having at most 4 queues.
> The actual queue number per VF depends on the total number of pool, which is
> determined by the max number of VF at PF initialization stage and the number of
> queue specified in config:
> 1) If the max number of VF is in the range from 1 to 32, and the number of rxq is 4
> ('--rxq 4' in testpmd), then there is totally 32 pools(ETH_32_POOLS), and each VF
> have 4 queues;
>  
> 2)If the max number of VF is in the range from 33 to 64, and the number of rxq is 2
> ('--rxq 2' in testpmd), then there is totally 64 pools(ETH_64_POOLS), and each VF
> have 2 queues;
>  
> On host, to enable VF RSS functionality, rx mq mode should be set as ETH_MQ_RX_VMDQ_RSS
> or ETH_MQ_RX_RSS mode, and SRIOV mode should be activated(max_vfs >= 1).
> It also needs config VF RSS information like hash function, RSS key, RSS key length.
>  
> The limitation for Niantic VF RSS is:
> the hash and key are shared among PF and all VF, the RETA table with 128 entries are
> also shared among PF and all VF. So it could not to provide a method to query the hash
> and reta content per VF on guest, while, if possible, please query them on host(PF) for
> the shared RETA information.

This kind of information should go in a documentation.
I think we should start new documentation for PMDs.
What about a doc/drivers/ directory ?

> changes in v6:
>   - refine codes and update message according to comments;
> 
> changes in v5:
>   - Fix minor issue and some comments;
>  
> changes in v4:
>   - Extract a function to remove embeded switch-case statement;
>   - Check whether RX queue number is a valid one, otherwise return error;
>   - Update the description a bit;
>  
> changes in v3:
>   - More cleanup;
>  
> changes in v2:
>   - Update the description;
>   - Use receiving queue number('--rxq <q-num>') specified in config to determine the
>     number of pool and the number of queue per VF;
>  
> changes in v1:
>   - Config VF RSS;
> 
> Changchun Ouyang (6):
>   ixgbe: Code cleanup
>   ixgbe: Negotiate VF API version
>   ixgbe: Get VF queue number
>   ether: Check VMDq RSS mode
>   ixgbe: Config VF RSS
>   testpmd: Set Rx VMDq RSS mode

Deeply reviewed by Vlad Zolotarov. Thanks!

Applied

Thanks to both of you
-- 
Thomas

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v6 0/6] Enable VF RSS for Niantic
  2015-01-18 22:24           ` [dpdk-dev] [PATCH v6 0/6] Enable VF RSS for Niantic Thomas Monjalon
@ 2015-01-19  4:51             ` Ouyang, Changchun
  0 siblings, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-19  4:51 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

Hi Thomas,

> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Monday, January 19, 2015 6:24 AM
> To: Ouyang, Changchun
> Cc: dev@dpdk.org; Vladislav Zolotarov; Butler, Siobhan A; Iremonger,
> Bernard
> Subject: Re: [dpdk-dev] [PATCH v6 0/6] Enable VF RSS for Niantic
> 
> > This patch enables VF RSS for Niantic, which allow each VF having at most 4
> queues.
> > The actual queue number per VF depends on the total number of pool,
> > which is determined by the max number of VF at PF initialization stage
> > and the number of queue specified in config:
> > 1) If the max number of VF is in the range from 1 to 32, and the
> > number of rxq is 4 ('--rxq 4' in testpmd), then there is totally 32
> > pools(ETH_32_POOLS), and each VF have 4 queues;
> >
> > 2)If the max number of VF is in the range from 33 to 64, and the
> > number of rxq is 2 ('--rxq 2' in testpmd), then there is totally 64
> > pools(ETH_64_POOLS), and each VF have 2 queues;
> >
> > On host, to enable VF RSS functionality, rx mq mode should be set as
> > ETH_MQ_RX_VMDQ_RSS or ETH_MQ_RX_RSS mode, and SRIOV mode
> should be activated(max_vfs >= 1).
> > It also needs config VF RSS information like hash function, RSS key, RSS key
> length.
> >
> > The limitation for Niantic VF RSS is:
> > the hash and key are shared among PF and all VF, the RETA table with
> > 128 entries are also shared among PF and all VF. So it could not to
> > provide a method to query the hash and reta content per VF on guest,
> > while, if possible, please query them on host(PF) for the shared RETA
> information.
> 
> This kind of information should go in a documentation.
> I think we should start new documentation for PMDs.
> What about a doc/drivers/ directory ?

Ok, before the new doc starts, I will consider putting it into program guide or user guide doc.  

> > changes in v6:
> >   - refine codes and update message according to comments;
> >
> > changes in v5:
> >   - Fix minor issue and some comments;
> >
> > changes in v4:
> >   - Extract a function to remove embeded switch-case statement;
> >   - Check whether RX queue number is a valid one, otherwise return error;
> >   - Update the description a bit;
> >
> > changes in v3:
> >   - More cleanup;
> >
> > changes in v2:
> >   - Update the description;
> >   - Use receiving queue number('--rxq <q-num>') specified in config to
> determine the
> >     number of pool and the number of queue per VF;
> >
> > changes in v1:
> >   - Config VF RSS;
> >
> > Changchun Ouyang (6):
> >   ixgbe: Code cleanup
> >   ixgbe: Negotiate VF API version
> >   ixgbe: Get VF queue number
> >   ether: Check VMDq RSS mode
> >   ixgbe: Config VF RSS
> >   testpmd: Set Rx VMDq RSS mode
> 
> Deeply reviewed by Vlad Zolotarov. Thanks!
> 
> Applied
> 

Thanks very much for merging!
Best regards
Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v6 3/6] ixgbe: Get VF queue number
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 3/6] ixgbe: Get VF queue number Ouyang Changchun
@ 2015-01-19  9:13             ` Wodkowski, PawelX
  2015-01-20  0:54               ` Ouyang, Changchun
  0 siblings, 1 reply; 144+ messages in thread
From: Wodkowski, PawelX @ 2015-01-19  9:13 UTC (permalink / raw)
  To: Ouyang, Changchun, dev



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang Changchun
> Sent: Monday, January 12, 2015 6:59 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v6 3/6] ixgbe: Get VF queue number
> 
> Get the available Rx and Tx queue number when receiving
> IXGBE_VF_GET_QUEUES message from VF.
> 
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> 
> changes in v5
>   - Add some 'FIX ME' comments for IXGBE_VF_TRANS_VLAN.
> 
> ---
>  lib/librte_pmd_ixgbe/ixgbe_pf.c | 40
> +++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> index 495aff5..dbda9b5 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> @@ -53,6 +53,8 @@
>  #include "ixgbe_ethdev.h"
> 
>  #define IXGBE_MAX_VFTA     (128)
> +#define IXGBE_VF_MSG_SIZE_DEFAULT 1
> +#define IXGBE_VF_GET_QUEUE_MSG_SIZE 5
> 
>  static inline uint16_t
>  dev_num_vf(struct rte_eth_dev *eth_dev)
> @@ -491,9 +493,41 @@ ixgbe_negotiate_vf_api(struct rte_eth_dev *dev,
> uint32_t vf, uint32_t *msgbuf)
>  }
> 
>  static int
> +ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
> +{
> +	struct ixgbe_vf_info *vfinfo =
> +		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
> +	uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> +
> +	/* Verify if the PF supports the mbox APIs version or not */
> +	switch (vfinfo[vf].api_version) {
> +	case ixgbe_mbox_api_20:
> +	case ixgbe_mbox_api_11:
> +		break;
> +	default:
> +		return -1;
> +	}
> +
> +	/* Notify VF of Rx and Tx queue number */
> +	msgbuf[IXGBE_VF_RX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> +	msgbuf[IXGBE_VF_TX_QUEUES] = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;

Are you sure this is good approach to pass nb_q_per_pool to VF as the number of 
available queues? What if PF does not use RSS nor DCB? Are thos queues always 
available in that case?

> +
> +	/* Notify VF of default queue */
> +	msgbuf[IXGBE_VF_DEF_QUEUE] = default_q;
> +
> +	/*
> +	 * FIX ME if it needs fill msgbuf[IXGBE_VF_TRANS_VLAN]
> +	 * for VLAN strip or VMDQ_DCB or VMDQ_DCB_RSS
> +	 */
> +
> +	return 0;
> +}
> +

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic
  2015-01-18 21:58           ` Thomas Monjalon
@ 2015-01-19  9:40             ` Vlad Zolotarov
  0 siblings, 0 replies; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-19  9:40 UTC (permalink / raw)
  To: Thomas Monjalon, dev, Ouyang Changchun


On 01/18/15 23:58, Thomas Monjalon wrote:
> 2015-01-08 11:56, Vlad Zolotarov:
>> I've acked PATCH1 and PATCH2 already before and since there are no
>> changes in them, pls.,  consider them ACKed... ;)
> Thank you for the great review.
>
> Note to everybody reading that:
> Vlad approved the 2 first patches of the serie at v4 stage.
> So it would be clearer (and easier when applying patches) to set the ack or
> reviewed-by line in the next versions if there are no changes (or minor) to
> these patches.

Sure. No problem.

> It's a good habit which is not well spread in DPDK community.
>
> Thanks

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v6 4/6] ether: Check VMDq RSS mode
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 4/6] ether: Check VMDq RSS mode Ouyang Changchun
  2015-01-12 14:06             ` Vlad Zolotarov
  2015-01-18 22:04             ` Thomas Monjalon
@ 2015-01-19 10:31             ` Wodkowski, PawelX
  2015-01-20  1:03               ` Ouyang, Changchun
  2 siblings, 1 reply; 144+ messages in thread
From: Wodkowski, PawelX @ 2015-01-19 10:31 UTC (permalink / raw)
  To: Ouyang, Changchun, dev

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang Changchun
> Sent: Monday, January 12, 2015 6:59 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v6 4/6] ether: Check VMDq RSS mode
> 
> Check mq mode for VMDq RSS, handle it correctly instead of returning an error;
> Also remove the limitation of per pool queue number has max value of 1,
> because
> the per pool queue number could be 2 or 4 if it is VMDq RSS mode;
> 
> The number of rxq specified in config will determine the mq mode for VMDq
> RSS.
> 
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> 
> changes in v6:
>   - More clear error message when queue number is invalid.
> 
> changes in v5:
>   - Fix '<' issue, it should be '<=' to test rxq number;
>   - Extract a function to remove the embeded switch-case statement.
> 
> ---
>  lib/librte_ether/rte_ethdev.c | 51
> ++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 46 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index 95f2ceb..e9e3368 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -503,6 +503,31 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev
> *dev, uint16_t nb_queues)
>  }
> 
>  static int
> +rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)
> +{
> +	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
> +	switch (nb_rx_q) {
> +	case 1:
> +	case 2:
> +		RTE_ETH_DEV_SRIOV(dev).active =
> +			ETH_64_POOLS;
> +		break;
> +	case 4:
> +		RTE_ETH_DEV_SRIOV(dev).active =
> +			ETH_32_POOLS;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
> +	RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
> +		dev->pci_dev->max_vfs * nb_rx_q;
> +
> +	return 0;
> +}
> +
> +static int
>  rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t
> nb_tx_q,
>  		      const struct rte_eth_conf *dev_conf)
>  {
> @@ -510,8 +535,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t
> nb_rx_q, uint16_t nb_tx_q,
> 
>  	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
>  		/* check multi-queue mode */
> -		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
> -		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
> +		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
>  		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) ||
>  		    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
>  			/* SRIOV only works in VMDq enable mode */
> @@ -525,7 +549,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t
> nb_rx_q, uint16_t nb_tx_q,
>  		}
> 
>  		switch (dev_conf->rxmode.mq_mode) {
> -		case ETH_MQ_RX_VMDQ_RSS:
>  		case ETH_MQ_RX_VMDQ_DCB:
>  		case ETH_MQ_RX_VMDQ_DCB_RSS:
>  			/* DCB/RSS VMDQ in SRIOV mode, not implement yet
> */
> @@ -534,6 +557,26 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,
> uint16_t nb_rx_q, uint16_t nb_tx_q,
>  					"unsupported VMDQ mq_mode rx
> %u\n",
>  					port_id, dev_conf-
> >rxmode.mq_mode);
>  			return (-EINVAL);
> +		case ETH_MQ_RX_RSS:
> +			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
> +					" SRIOV active, "
> +					"Rx mq mode is changed from:"
> +					"mq_mode %u into VMDQ mq_mode
> %u\n",
> +					port_id,
> +					dev_conf->rxmode.mq_mode,
> +					dev->data-
> >dev_conf.rxmode.mq_mode);
> +		case ETH_MQ_RX_VMDQ_RSS:
> +			dev->data->dev_conf.rxmode.mq_mode =
> ETH_MQ_RX_VMDQ_RSS;
> +			if (nb_rx_q <=
> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)
> +				if (rte_eth_dev_check_vf_rss_rxq_num(port_id,
> nb_rx_q) != 0) {
> +					PMD_DEBUG_TRACE("ethdev
> port_id=%d"
> +						" SRIOV active, invalid queue"
> +						" number for VMDQ RSS,
> allowed"
> +						" value are 1, 2 or 4\n",
> +						port_id);
> +					return -EINVAL;
> +				}
> +			break;
>  		default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE
> */
>  			/* if nothing mq mode configure, use default scheme */
>  			dev->data->dev_conf.rxmode.mq_mode =
> ETH_MQ_RX_VMDQ_ONLY;
> @@ -553,8 +596,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t
> nb_rx_q, uint16_t nb_tx_q,
>  		default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE
> */
>  			/* if nothing mq mode configure, use default scheme */
>  			dev->data->dev_conf.txmode.mq_mode =
> ETH_MQ_TX_VMDQ_ONLY;
> -			if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
> -				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;

Why you removed this completely?

>  			break;
>  		}
> 
> --
> 1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v6 3/6] ixgbe: Get VF queue number
  2015-01-19  9:13             ` Wodkowski, PawelX
@ 2015-01-20  0:54               ` Ouyang, Changchun
  0 siblings, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-20  0:54 UTC (permalink / raw)
  To: Wodkowski, PawelX, dev



> -----Original Message-----
> From: Wodkowski, PawelX
> Sent: Monday, January 19, 2015 5:14 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Cc: Thomas Monjalon; Vlad Zolotarov
> Subject: RE: [dpdk-dev] [PATCH v6 3/6] ixgbe: Get VF queue number
> 
> 
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang
> Changchun
> > Sent: Monday, January 12, 2015 6:59 AM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH v6 3/6] ixgbe: Get VF queue number
> >
> > Get the available Rx and Tx queue number when receiving
> > IXGBE_VF_GET_QUEUES message from VF.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >
> > changes in v5
> >   - Add some 'FIX ME' comments for IXGBE_VF_TRANS_VLAN.
> >
> > ---
> >  lib/librte_pmd_ixgbe/ixgbe_pf.c | 40
> > +++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 39 insertions(+), 1 deletion(-)
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 495aff5..dbda9b5 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > @@ -53,6 +53,8 @@
> >  #include "ixgbe_ethdev.h"
> >
> >  #define IXGBE_MAX_VFTA     (128)
> > +#define IXGBE_VF_MSG_SIZE_DEFAULT 1
> > +#define IXGBE_VF_GET_QUEUE_MSG_SIZE 5
> >
> >  static inline uint16_t
> >  dev_num_vf(struct rte_eth_dev *eth_dev) @@ -491,9 +493,41 @@
> > ixgbe_negotiate_vf_api(struct rte_eth_dev *dev, uint32_t vf, uint32_t
> > *msgbuf)  }
> >
> >  static int
> > +ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t
> > +*msgbuf) {
> > +	struct ixgbe_vf_info *vfinfo =
> > +		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data-
> >dev_private);
> > +	uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> > +
> > +	/* Verify if the PF supports the mbox APIs version or not */
> > +	switch (vfinfo[vf].api_version) {
> > +	case ixgbe_mbox_api_20:
> > +	case ixgbe_mbox_api_11:
> > +		break;
> > +	default:
> > +		return -1;
> > +	}
> > +
> > +	/* Notify VF of Rx and Tx queue number */
> > +	msgbuf[IXGBE_VF_RX_QUEUES] =
> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> > +	msgbuf[IXGBE_VF_TX_QUEUES] =
> RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
> 
> Are you sure this is good approach to pass nb_q_per_pool to VF as the
> number of available queues? What if PF does not use RSS nor DCB? Are thos
> queues always available in that case?
> 

In that case(neither rss nor dcb), nb_q_per_pool is 1, so it also works, I have validated it.
Thanks
Changchun

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v6 4/6] ether: Check VMDq RSS mode
  2015-01-19 10:31             ` Wodkowski, PawelX
@ 2015-01-20  1:03               ` Ouyang, Changchun
  0 siblings, 0 replies; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-20  1:03 UTC (permalink / raw)
  To: Wodkowski, PawelX, dev



> -----Original Message-----
> From: Wodkowski, PawelX
> Sent: Monday, January 19, 2015 6:31 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Cc: Thomas Monjalon; Vlad Zolotarov
> Subject: RE: [dpdk-dev] [PATCH v6 4/6] ether: Check VMDq RSS mode
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang
> Changchun
> > Sent: Monday, January 12, 2015 6:59 AM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH v6 4/6] ether: Check VMDq RSS mode
> >
> > Check mq mode for VMDq RSS, handle it correctly instead of returning
> > an error; Also remove the limitation of per pool queue number has max
> > value of 1, because the per pool queue number could be 2 or 4 if it is
> > VMDq RSS mode;
> >
> > The number of rxq specified in config will determine the mq mode for
> > VMDq RSS.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >
> > changes in v6:
> >   - More clear error message when queue number is invalid.
> >
> > changes in v5:
> >   - Fix '<' issue, it should be '<=' to test rxq number;
> >   - Extract a function to remove the embeded switch-case statement.
> >
> > ---
> >  lib/librte_ether/rte_ethdev.c | 51
> > ++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 46 insertions(+), 5 deletions(-)
> >
> > diff --git a/lib/librte_ether/rte_ethdev.c
> > b/lib/librte_ether/rte_ethdev.c index 95f2ceb..e9e3368 100644
> > --- a/lib/librte_ether/rte_ethdev.c
> > +++ b/lib/librte_ether/rte_ethdev.c
> > @@ -503,6 +503,31 @@ rte_eth_dev_tx_queue_config(struct
> rte_eth_dev
> > *dev, uint16_t nb_queues)  }
> >
> >  static int
> > +rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q)
> {
> > +	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
> > +	switch (nb_rx_q) {
> > +	case 1:
> > +	case 2:
> > +		RTE_ETH_DEV_SRIOV(dev).active =
> > +			ETH_64_POOLS;
> > +		break;
> > +	case 4:
> > +		RTE_ETH_DEV_SRIOV(dev).active =
> > +			ETH_32_POOLS;
> > +		break;
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +
> > +	RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
> > +	RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx =
> > +		dev->pci_dev->max_vfs * nb_rx_q;
> > +
> > +	return 0;
> > +}
> > +
> > +static int
> >  rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
> uint16_t
> > nb_tx_q,
> >  		      const struct rte_eth_conf *dev_conf)  { @@ -510,8 +535,7
> @@
> > rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
> uint16_t
> > nb_tx_q,
> >
> >  	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
> >  		/* check multi-queue mode */
> > -		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ||
> > -		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
> > +		if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) ||
> >  		    (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS)
> ||
> >  		    (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) {
> >  			/* SRIOV only works in VMDq enable mode */ @@ -
> 525,7 +549,6 @@
> > rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q,
> uint16_t
> > nb_tx_q,
> >  		}
> >
> >  		switch (dev_conf->rxmode.mq_mode) {
> > -		case ETH_MQ_RX_VMDQ_RSS:
> >  		case ETH_MQ_RX_VMDQ_DCB:
> >  		case ETH_MQ_RX_VMDQ_DCB_RSS:
> >  			/* DCB/RSS VMDQ in SRIOV mode, not implement
> yet */ @@ -534,6
> > +557,26 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t
> > nb_rx_q, uint16_t nb_tx_q,
> >  					"unsupported VMDQ mq_mode rx
> > %u\n",
> >  					port_id, dev_conf-
> > >rxmode.mq_mode);
> >  			return (-EINVAL);
> > +		case ETH_MQ_RX_RSS:
> > +			PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8
> > +					" SRIOV active, "
> > +					"Rx mq mode is changed from:"
> > +					"mq_mode %u into VMDQ mq_mode
> > %u\n",
> > +					port_id,
> > +					dev_conf->rxmode.mq_mode,
> > +					dev->data-
> > >dev_conf.rxmode.mq_mode);
> > +		case ETH_MQ_RX_VMDQ_RSS:
> > +			dev->data->dev_conf.rxmode.mq_mode =
> > ETH_MQ_RX_VMDQ_RSS;
> > +			if (nb_rx_q <=
> > RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)
> > +				if
> (rte_eth_dev_check_vf_rss_rxq_num(port_id,
> > nb_rx_q) != 0) {
> > +					PMD_DEBUG_TRACE("ethdev
> > port_id=%d"
> > +						" SRIOV active, invalid queue"
> > +						" number for VMDQ RSS,
> > allowed"
> > +						" value are 1, 2 or 4\n",
> > +						port_id);
> > +					return -EINVAL;
> > +				}
> > +			break;
> >  		default: /* ETH_MQ_RX_VMDQ_ONLY or
> ETH_MQ_RX_NONE */
> >  			/* if nothing mq mode configure, use default scheme
> */
> >  			dev->data->dev_conf.rxmode.mq_mode =
> ETH_MQ_RX_VMDQ_ONLY; @@
> > -553,8 +596,6 @@ rte_eth_dev_check_mq_mode(uint8_t port_id,
> uint16_t
> > nb_rx_q, uint16_t nb_tx_q,
> >  		default: /* ETH_MQ_TX_VMDQ_ONLY or
> ETH_MQ_TX_NONE */
> >  			/* if nothing mq mode configure, use default scheme
> */
> >  			dev->data->dev_conf.txmode.mq_mode =
> ETH_MQ_TX_VMDQ_ONLY;
> > -			if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1)
> > -				RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool =
> 1;
> 
> Why you removed this completely?
> 
I have discuss this with Valid in previous email,
The reason for removing it, is that when  rxmode.mq_mode is ETH_MQ_RX_VMDQ_RSS, and txmode.mq_mode is ETH_MQ_TX_NONE,
The logic is incorrect, in such a case the nb_q_per_pool could be 1, or 2, or 4,
Forcedly set it into 1 will always disable the vf rss.

I see you will split nb_q_per_pool into nb_rxq_per_pool and nb_txq_per_pool, that's good way to meet rss, dcb different cases requirement,
So you need consider such case, don't break anything.

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
  2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS Ouyang Changchun
  2015-01-12 14:04             ` Vlad Zolotarov
@ 2015-01-20  9:35             ` Wodkowski, PawelX
  2015-01-21  2:43               ` Ouyang, Changchun
  1 sibling, 1 reply; 144+ messages in thread
From: Wodkowski, PawelX @ 2015-01-20  9:35 UTC (permalink / raw)
  To: Ouyang, Changchun, dev

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang Changchun
> Sent: Monday, January 12, 2015 6:59 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
> 
> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF RSS.
> 
> The psrtype will determine how many queues the received packets will distribute
> to,
> and the value of psrtype should depends on both facet: max VF rxq number
> which
> has been negotiated with PF, and the number of rxq specified in config on guest.
> 
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> 
> Changes in v6:
>   - Raise an error for the case of ETH_16_POOLS in config vf rss, as the previous
>     logic have changed it into: ETH_32_POOLS.
> 
> Changes in v4:
>  - The number of rxq from config should be power of 2 and should not bigger
> than
>     max VF rxq number(negotiated between guest and host).
> 
> ---
>  lib/librte_pmd_ixgbe/ixgbe_pf.c   |  15 ++++++
>  lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 102
> +++++++++++++++++++++++++++++++++-----
>  2 files changed, 105 insertions(+), 12 deletions(-)
> 
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> index dbda9b5..93f6e43 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> *eth_dev)
>  	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw->mac.num_rar_entries),
> 0);
>  	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw->mac.num_rar_entries),
> 0);
> 
> +	/*
> +	 * VF RSS can support at most 4 queues for each VF, even if
> +	 * 8 queues are available for each VF, it need refine to 4
> +	 * queues here due to this limitation, otherwise no queue
> +	 * will receive any packet even RSS is enabled.
> +	 */
> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> ETH_MQ_RX_VMDQ_RSS) {
> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> ETH_32_POOLS;
> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> +				dev_num_vf(eth_dev) * 4;
> +		}
> +	}
> +

I did not looked before at your patches but I think you are messing with things that should not be changed:

Why you are changing those values. They are set up during ixgbe_pf_host_init(). Limitation you are
describing is only RSS related. If there will be reconfiguration from 
ETH_MQ_RX_VMDQ_RSS to other mode those value need to be re-evaluated. If you find this
kind of limitation you should handle it during RSS part configuration. Or if your way is the right way
you should explicitly make separate function that will re-evaluate those parameters each time.

Second issue with this code is that the nb_q_per_pool is changed from:
ixgbe_pf_host_configure() -> ixgbe_dev_start() -> rte_eth_dev_start()
and
rte_eth_dev_check_vf_rss_rxq_num() -> rte_eth_dev_check_mq_mode() -> rte_eth_dev_configure()

Which one is the right one? If both, why they are calculated twice?

I don't think that rte_eth_dev_data::sriov field should be changed at all - it holds current SRIOV capabilities.
If this will change during runtime it no point to have this field at all and should be some kind of "siov_get()"
function that will calculate and return those parameters dynamically.

Please refer also to <F6F2A6264E145F47A18AB6DF8E87425D12B89B02@IRSMSX102.ger.corp.intel.com>
for further issues.

I think this patchset should not be applied.

>  	/* set VMDq map to default PF pool */
>  	hw->mac.ops.set_vmdq(hw, 0,
> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> 
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> index f69abda..20627df 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> @@ -3327,6 +3327,67 @@ ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue
> *rxq)
>  }
> 
>  static int
> +ixgbe_config_vf_rss(struct rte_eth_dev *dev)
> +{
> +	struct ixgbe_hw *hw;
> +	uint32_t mrqc;
> +
> +	ixgbe_rss_configure(dev);
> +
> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +
> +	/* MRQC: enable VF RSS */
> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> +	case ETH_64_POOLS:
> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> +		break;
> +
> +	case ETH_32_POOLS:
> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> +		break;
> +
> +	default:
> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with
> VMDQ RSS");
> +		return -EINVAL;
> +	}
> +
> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> +
> +	return 0;
> +}
> +
> +static int
> +ixgbe_config_vf_default(struct rte_eth_dev *dev)
> +{
> +	struct ixgbe_hw *hw =
> +		IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +
> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> +	case ETH_64_POOLS:
> +		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +			IXGBE_MRQC_VMDQEN);
> +		break;
> +
> +	case ETH_32_POOLS:
> +		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +			IXGBE_MRQC_VMDQRT4TCEN);
> +		break;
> +
> +	case ETH_16_POOLS:
> +		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +			IXGBE_MRQC_VMDQRT8TCEN);
> +		break;
> +	default:
> +		PMD_INIT_LOG(ERR,
> +			"invalid pool number in IOV mode");
> +		break;
> +	}
> +	return 0;
> +}
> +
> +static int
>  ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>  {
>  	struct ixgbe_hw *hw =
> @@ -3358,24 +3419,25 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev
> *dev)
>  			default: ixgbe_rss_disable(dev);
>  		}
>  	} else {
> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
>  		/*
>  		 * SRIOV active scheme
> -		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> +		 * Support RSS together with VMDq & SRIOV
>  		 */
> -		case ETH_64_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQEN);
> -			break;
> -
> -		case ETH_32_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQRT4TCEN);
> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> +		case ETH_MQ_RX_RSS:
> +		case ETH_MQ_RX_VMDQ_RSS:
> +			ixgbe_config_vf_rss(dev);
>  			break;
> 
> -		case ETH_16_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQRT8TCEN);
> -			break;
> +		/* FIXME if support DCB/RSS together with VMDq & SRIOV */
> +		case ETH_MQ_RX_VMDQ_DCB:
> +		case ETH_MQ_RX_VMDQ_DCB_RSS:
> +			PMD_INIT_LOG(ERR,
> +				"Could not support DCB with VMDq & SRIOV");
> +			return -1;
>  		default:
> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
> mode");
> +			ixgbe_config_vf_default(dev);
> +			break;
>  		}
>  	}
> 
> @@ -3993,6 +4055,19 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>  	PMD_INIT_FUNC_TRACE();
>  	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> 
> +	if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> +			"it should be power of 2");
> +		return -1;
> +	}
> +
> +	if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> +			"it should be equal to or less than %d",
> +			hw->mac.max_rx_queues);
> +		return -1;
> +	}
> +
>  	/*
>  	 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
>  	 * disables the VF receipt of packets if the PF MTU is > 1500.
> @@ -4094,6 +4169,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>  			IXGBE_PSRTYPE_IPV6HDR;
>  #endif
> 
> +	/* Set RQPL for VF RSS according to max Rx queue */
> +	psrtype |= (dev->data->nb_rx_queues >> 1) <<
> +		IXGBE_PSRTYPE_RQPL_SHIFT;
>  	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> 
>  	if (dev->data->dev_conf.rxmode.enable_scatter) {
> --
> 1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
  2015-01-20  9:35             ` Wodkowski, PawelX
@ 2015-01-21  2:43               ` Ouyang, Changchun
  2015-01-21  8:44                 ` Wodkowski, PawelX
  0 siblings, 1 reply; 144+ messages in thread
From: Ouyang, Changchun @ 2015-01-21  2:43 UTC (permalink / raw)
  To: Wodkowski, PawelX, dev



> -----Original Message-----
> From: Wodkowski, PawelX
> Sent: Tuesday, January 20, 2015 5:35 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang
> Changchun
> > Sent: Monday, January 12, 2015 6:59 AM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
> >
> > It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
> RSS.
> >
> > The psrtype will determine how many queues the received packets will
> > distribute to, and the value of psrtype should depends on both facet:
> > max VF rxq number which has been negotiated with PF, and the number of
> > rxq specified in config on guest.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >
> > Changes in v6:
> >   - Raise an error for the case of ETH_16_POOLS in config vf rss, as the
> previous
> >     logic have changed it into: ETH_32_POOLS.
> >
> > Changes in v4:
> >  - The number of rxq from config should be power of 2 and should not
> > bigger than
> >     max VF rxq number(negotiated between guest and host).
> >
> > ---
> >  lib/librte_pmd_ixgbe/ixgbe_pf.c   |  15 ++++++
> >  lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 102
> > +++++++++++++++++++++++++++++++++-----
> >  2 files changed, 105 insertions(+), 12 deletions(-)
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index dbda9b5..93f6e43 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> > *eth_dev)
> >  	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> >mac.num_rar_entries),
> > 0);
> >  	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> >mac.num_rar_entries),
> > 0);
> >
> > +	/*
> > +	 * VF RSS can support at most 4 queues for each VF, even if
> > +	 * 8 queues are available for each VF, it need refine to 4
> > +	 * queues here due to this limitation, otherwise no queue
> > +	 * will receive any packet even RSS is enabled.
> > +	 */
> > +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> > ETH_MQ_RX_VMDQ_RSS) {
> > +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> > +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> > ETH_32_POOLS;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> > +				dev_num_vf(eth_dev) * 4;
> > +		}
> > +	}
> > +
> 
> I did not looked before at your patches but I think you are messing with
> things that should not be changed:
> 
> Why you are changing those values. They are set up during
> ixgbe_pf_host_init(). Limitation you are describing is only RSS related. If
> there will be reconfiguration from ETH_MQ_RX_VMDQ_RSS to other mode
> those value need to be re-evaluated. If you find this kind of limitation you
> should handle it during RSS part configuration. Or if your way is the right way
> you should explicitly make separate function that will re-evaluate those
> parameters each time.
> 
> Second issue with this code is that the nb_q_per_pool is changed from:
> ixgbe_pf_host_configure() -> ixgbe_dev_start() -> rte_eth_dev_start() and
> rte_eth_dev_check_vf_rss_rxq_num() -> rte_eth_dev_check_mq_mode() ->
> rte_eth_dev_configure()
> 
> Which one is the right one? If both, why they are calculated twice?
> 
> I don't think that rte_eth_dev_data::sriov field should be changed at all - it
> holds current SRIOV capabilities.
> If this will change during runtime it no point to have this field at all and should
> be some kind of "siov_get()"
> function that will calculate and return those parameters dynamically.
> 
> Please refer also to
> <F6F2A6264E145F47A18AB6DF8E87425D12B89B02@IRSMSX102.ger.corp.intel
> .com>
> for further issues.
> 
> I think this patchset should not be applied.

The better way should be either raise your comments before this patch is merged into mainline, or
You send out a patch to fix it.
I agree on part of what you said, the check is not necessary for vf rss in pf_host_configure because
Check_mq_mode has already check the queue number, I will send out a patch to fix it by removing this check.

On the other hand, I disagree with you on " rte_eth_dev_data::sriov field should be changed at all ",
The reason we need refine those value, is that those value get in pf_init, which is called on dev probe stage,
And those value are not accurate, they should vary according to mq mode, the mq mode could be determined only after
Dev is configured.

> 
> >  	/* set VMDq map to default PF pool */
> >  	hw->mac.ops.set_vmdq(hw, 0,
> > RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > index f69abda..20627df 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > @@ -3327,6 +3327,67 @@ ixgbe_alloc_rx_queue_mbufs(struct
> igb_rx_queue
> > *rxq)
> >  }
> >
> >  static int
> > +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> > +	struct ixgbe_hw *hw;
> > +	uint32_t mrqc;
> > +
> > +	ixgbe_rss_configure(dev);
> > +
> > +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > +
> > +	/* MRQC: enable VF RSS */
> > +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> > +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> > +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > +	case ETH_64_POOLS:
> > +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> > +		break;
> > +
> > +	case ETH_32_POOLS:
> > +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> > +		break;
> > +
> > +	default:
> > +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with
> > VMDQ RSS");
> > +		return -EINVAL;
> > +	}
> > +
> > +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> > +
> > +	return 0;
> > +}
> > +
> > +static int
> > +ixgbe_config_vf_default(struct rte_eth_dev *dev) {
> > +	struct ixgbe_hw *hw =
> > +		IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > +
> > +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > +	case ETH_64_POOLS:
> > +		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +			IXGBE_MRQC_VMDQEN);
> > +		break;
> > +
> > +	case ETH_32_POOLS:
> > +		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +			IXGBE_MRQC_VMDQRT4TCEN);
> > +		break;
> > +
> > +	case ETH_16_POOLS:
> > +		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +			IXGBE_MRQC_VMDQRT8TCEN);
> > +		break;
> > +	default:
> > +		PMD_INIT_LOG(ERR,
> > +			"invalid pool number in IOV mode");
> > +		break;
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int
> >  ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)  {
> >  	struct ixgbe_hw *hw =
> > @@ -3358,24 +3419,25 @@ ixgbe_dev_mq_rx_configure(struct
> rte_eth_dev
> > *dev)
> >  			default: ixgbe_rss_disable(dev);
> >  		}
> >  	} else {
> > -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >  		/*
> >  		 * SRIOV active scheme
> > -		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> > +		 * Support RSS together with VMDq & SRIOV
> >  		 */
> > -		case ETH_64_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > IXGBE_MRQC_VMDQEN);
> > -			break;
> > -
> > -		case ETH_32_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > IXGBE_MRQC_VMDQRT4TCEN);
> > +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> > +		case ETH_MQ_RX_RSS:
> > +		case ETH_MQ_RX_VMDQ_RSS:
> > +			ixgbe_config_vf_rss(dev);
> >  			break;
> >
> > -		case ETH_16_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > IXGBE_MRQC_VMDQRT8TCEN);
> > -			break;
> > +		/* FIXME if support DCB/RSS together with VMDq & SRIOV */
> > +		case ETH_MQ_RX_VMDQ_DCB:
> > +		case ETH_MQ_RX_VMDQ_DCB_RSS:
> > +			PMD_INIT_LOG(ERR,
> > +				"Could not support DCB with VMDq &
> SRIOV");
> > +			return -1;
> >  		default:
> > -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
> > mode");
> > +			ixgbe_config_vf_default(dev);
> > +			break;
> >  		}
> >  	}
> >
> > @@ -3993,6 +4055,19 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >  	PMD_INIT_FUNC_TRACE();
> >  	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >
> > +	if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
> > +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> > +			"it should be power of 2");
> > +		return -1;
> > +	}
> > +
> > +	if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
> > +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> > +			"it should be equal to or less than %d",
> > +			hw->mac.max_rx_queues);
> > +		return -1;
> > +	}
> > +
> >  	/*
> >  	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
> driver
> >  	 * disables the VF receipt of packets if the PF MTU is > 1500.
> > @@ -4094,6 +4169,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >  			IXGBE_PSRTYPE_IPV6HDR;
> >  #endif
> >
> > +	/* Set RQPL for VF RSS according to max Rx queue */
> > +	psrtype |= (dev->data->nb_rx_queues >> 1) <<
> > +		IXGBE_PSRTYPE_RQPL_SHIFT;
> >  	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> >
> >  	if (dev->data->dev_conf.rxmode.enable_scatter) {
> > --
> > 1.8.4.2

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
  2015-01-21  2:43               ` Ouyang, Changchun
@ 2015-01-21  8:44                 ` Wodkowski, PawelX
  2015-01-22 12:59                   ` Vlad Zolotarov
  0 siblings, 1 reply; 144+ messages in thread
From: Wodkowski, PawelX @ 2015-01-21  8:44 UTC (permalink / raw)
  To: Ouyang, Changchun, dev



> -----Original Message-----
> From: Ouyang, Changchun
> Sent: Wednesday, January 21, 2015 3:44 AM
> To: Wodkowski, PawelX; dev@dpdk.org
> Cc: Ouyang, Changchun
> Subject: RE: [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
> 
> 
> 
> > -----Original Message-----
> > From: Wodkowski, PawelX
> > Sent: Tuesday, January 20, 2015 5:35 PM
> > To: Ouyang, Changchun; dev@dpdk.org
> > Subject: RE: [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
> >
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang
> > Changchun
> > > Sent: Monday, January 12, 2015 6:59 AM
> > > To: dev@dpdk.org
> > > Subject: [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
> > >
> > > It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
> > RSS.
> > >
> > > The psrtype will determine how many queues the received packets will
> > > distribute to, and the value of psrtype should depends on both facet:
> > > max VF rxq number which has been negotiated with PF, and the number of
> > > rxq specified in config on guest.
> > >
> > > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > >
> > > Changes in v6:
> > >   - Raise an error for the case of ETH_16_POOLS in config vf rss, as the
> > previous
> > >     logic have changed it into: ETH_32_POOLS.
> > >
> > > Changes in v4:
> > >  - The number of rxq from config should be power of 2 and should not
> > > bigger than
> > >     max VF rxq number(negotiated between guest and host).
> > >
> > > ---
> > >  lib/librte_pmd_ixgbe/ixgbe_pf.c   |  15 ++++++
> > >  lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 102
> > > +++++++++++++++++++++++++++++++++-----
> > >  2 files changed, 105 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index dbda9b5..93f6e43 100644
> > > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> > > *eth_dev)
> > >  	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> > >mac.num_rar_entries),
> > > 0);
> > >  	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> > >mac.num_rar_entries),
> > > 0);
> > >
> > > +	/*
> > > +	 * VF RSS can support at most 4 queues for each VF, even if
> > > +	 * 8 queues are available for each VF, it need refine to 4
> > > +	 * queues here due to this limitation, otherwise no queue
> > > +	 * will receive any packet even RSS is enabled.
> > > +	 */
> > > +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> > > ETH_MQ_RX_VMDQ_RSS) {
> > > +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> > > +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> > > ETH_32_POOLS;
> > > +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> > > +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> > > +				dev_num_vf(eth_dev) * 4;
> > > +		}
> > > +	}
> > > +
> >
> > I did not looked before at your patches but I think you are messing with
> > things that should not be changed:
> >
> > Why you are changing those values. They are set up during
> > ixgbe_pf_host_init(). Limitation you are describing is only RSS related. If
> > there will be reconfiguration from ETH_MQ_RX_VMDQ_RSS to other mode
> > those value need to be re-evaluated. If you find this kind of limitation you
> > should handle it during RSS part configuration. Or if your way is the right way
> > you should explicitly make separate function that will re-evaluate those
> > parameters each time.
> >
> > Second issue with this code is that the nb_q_per_pool is changed from:
> > ixgbe_pf_host_configure() -> ixgbe_dev_start() -> rte_eth_dev_start() and
> > rte_eth_dev_check_vf_rss_rxq_num() -> rte_eth_dev_check_mq_mode() ->
> > rte_eth_dev_configure()
> >
> > Which one is the right one? If both, why they are calculated twice?
> >
> > I don't think that rte_eth_dev_data::sriov field should be changed at all - it
> > holds current SRIOV capabilities.
> > If this will change during runtime it no point to have this field at all and should
> > be some kind of "siov_get()"
> > function that will calculate and return those parameters dynamically.
> >
> > Please refer also to
> >
> <F6F2A6264E145F47A18AB6DF8E87425D12B89B02@IRSMSX102.ger.corp.intel
> > .com>
> > for further issues.
> >
> > I think this patchset should not be applied.
> 
> The better way should be either raise your comments before this patch is
> merged into mainline, or

Yes, I should but I trusted that Vlad review was covering this part. Does no matter
my, fault.

> You send out a patch to fix it.
> I agree on part of what you said, the check is not necessary for vf rss in
> pf_host_configure because
> Check_mq_mode has already check the queue number, I will send out a patch to
> fix it by removing this check.
> 
> On the other hand, I disagree with you on " rte_eth_dev_data::sriov field should
> be changed at all ",

This is my private opinion, but either way, recalculating those values or not,
it should be consistent and for feature development well documented when it is 
evaluated. Changing something in function that's name is calculated
"rte_eth_dev_check_mq_mode()" is not so very obvious.

> The reason we need refine those value, is that those value get in pf_init, which is
> called on dev probe stage,
> And those value are not accurate, they should vary according to mq mode, the
> mq mode could be determined only after
> Dev is configured.

If you think they are "not accurate" you should not calculate them because they are
invalid and make VF behavior undefined. VF can probe those values before you
make them "accurate" in port configuration phase. What then? It is a race condition
bug, and it definitely should be fixed in your next patch.

You should also fix port reconfiguration bug as I mention before (for VFs > 0 testpmd
is unable to start port after commnad 'port config all rxq X', X > 1 after RSS VF 
patches).

Pawel

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
  2015-01-21  8:44                 ` Wodkowski, PawelX
@ 2015-01-22 12:59                   ` Vlad Zolotarov
  2015-01-22 13:19                     ` Wodkowski, PawelX
  0 siblings, 1 reply; 144+ messages in thread
From: Vlad Zolotarov @ 2015-01-22 12:59 UTC (permalink / raw)
  To: Wodkowski, PawelX, Ouyang, Changchun, dev


On 01/21/15 10:44, Wodkowski, PawelX wrote:
>
>> -----Original Message-----
>> From: Ouyang, Changchun
>> Sent: Wednesday, January 21, 2015 3:44 AM
>> To: Wodkowski, PawelX; dev@dpdk.org
>> Cc: Ouyang, Changchun
>> Subject: RE: [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
>>
>>
>>
>>> -----Original Message-----
>>> From: Wodkowski, PawelX
>>> Sent: Tuesday, January 20, 2015 5:35 PM
>>> To: Ouyang, Changchun; dev@dpdk.org
>>> Subject: RE: [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
>>>
>>>> -----Original Message-----
>>>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang
>>> Changchun
>>>> Sent: Monday, January 12, 2015 6:59 AM
>>>> To: dev@dpdk.org
>>>> Subject: [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
>>>>
>>>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
>>> RSS.
>>>> The psrtype will determine how many queues the received packets will
>>>> distribute to, and the value of psrtype should depends on both facet:
>>>> max VF rxq number which has been negotiated with PF, and the number of
>>>> rxq specified in config on guest.
>>>>
>>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>>>
>>>> Changes in v6:
>>>>    - Raise an error for the case of ETH_16_POOLS in config vf rss, as the
>>> previous
>>>>      logic have changed it into: ETH_32_POOLS.
>>>>
>>>> Changes in v4:
>>>>   - The number of rxq from config should be power of 2 and should not
>>>> bigger than
>>>>      max VF rxq number(negotiated between guest and host).
>>>>
>>>> ---
>>>>   lib/librte_pmd_ixgbe/ixgbe_pf.c   |  15 ++++++
>>>>   lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 102
>>>> +++++++++++++++++++++++++++++++++-----
>>>>   2 files changed, 105 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index dbda9b5..93f6e43 100644
>>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
>>>> *eth_dev)
>>>>   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
>>>> mac.num_rar_entries),
>>>> 0);
>>>>   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
>>>> mac.num_rar_entries),
>>>> 0);
>>>>
>>>> +	/*
>>>> +	 * VF RSS can support at most 4 queues for each VF, even if
>>>> +	 * 8 queues are available for each VF, it need refine to 4
>>>> +	 * queues here due to this limitation, otherwise no queue
>>>> +	 * will receive any packet even RSS is enabled.
>>>> +	 */
>>>> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
>>>> ETH_MQ_RX_VMDQ_RSS) {
>>>> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
>>>> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
>>>> ETH_32_POOLS;
>>>> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
>>>> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
>>>> +				dev_num_vf(eth_dev) * 4;
>>>> +		}
>>>> +	}
>>>> +
>>> I did not looked before at your patches but I think you are messing with
>>> things that should not be changed:
>>>
>>> Why you are changing those values. They are set up during
>>> ixgbe_pf_host_init(). Limitation you are describing is only RSS related. If
>>> there will be reconfiguration from ETH_MQ_RX_VMDQ_RSS to other mode
>>> those value need to be re-evaluated. If you find this kind of limitation you
>>> should handle it during RSS part configuration. Or if your way is the right way
>>> you should explicitly make separate function that will re-evaluate those
>>> parameters each time.
>>>
>>> Second issue with this code is that the nb_q_per_pool is changed from:
>>> ixgbe_pf_host_configure() -> ixgbe_dev_start() -> rte_eth_dev_start() and
>>> rte_eth_dev_check_vf_rss_rxq_num() -> rte_eth_dev_check_mq_mode() ->
>>> rte_eth_dev_configure()
>>>
>>> Which one is the right one? If both, why they are calculated twice?
>>>
>>> I don't think that rte_eth_dev_data::sriov field should be changed at all - it
>>> holds current SRIOV capabilities.
>>> If this will change during runtime it no point to have this field at all and should
>>> be some kind of "siov_get()"
>>> function that will calculate and return those parameters dynamically.
>>>
>>> Please refer also to
>>>
>> <F6F2A6264E145F47A18AB6DF8E87425D12B89B02@IRSMSX102.ger.corp.intel
>>> .com>
>>> for further issues.
>>>
>>> I think this patchset should not be applied.
>> The better way should be either raise your comments before this patch is
>> merged into mainline, or
> Yes, I should but I trusted that Vlad review was covering this part.

I'm new on the list and my experience with DPDK is about two months so, 
pls., don't judge me too harsh... ;)
I tried to cover the obvious things and actually learned the code while 
reviewing. The things u say, Pavel(X?) make sense and I obviously missed 
that.
But as Changchun mentioned there is nothing that can't be fixed with a 
followup patches... ;)


> Does no matter
> my, fault.
>
>> You send out a patch to fix it.
>> I agree on part of what you said, the check is not necessary for vf rss in
>> pf_host_configure because
>> Check_mq_mode has already check the queue number, I will send out a patch to
>> fix it by removing this check.
>>
>> On the other hand, I disagree with you on " rte_eth_dev_data::sriov field should
>> be changed at all ",
> This is my private opinion, but either way, recalculating those values or not,
> it should be consistent and for feature development well documented when it is
> evaluated. Changing something in function that's name is calculated
> "rte_eth_dev_check_mq_mode()" is not so very obvious.
>
>> The reason we need refine those value, is that those value get in pf_init, which is
>> called on dev probe stage,
>> And those value are not accurate, they should vary according to mq mode, the
>> mq mode could be determined only after
>> Dev is configured.
> If you think they are "not accurate" you should not calculate them because they are
> invalid and make VF behavior undefined. VF can probe those values before you
> make them "accurate" in port configuration phase. What then? It is a race condition
> bug, and it definitely should be fixed in your next patch.
>
> You should also fix port reconfiguration bug as I mention before (for VFs > 0 testpmd
> is unable to start port after commnad 'port config all rxq X', X > 1 after RSS VF
> patches).
>
> Pawel
>
>

^ permalink raw reply	[flat|nested] 144+ messages in thread

* Re: [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS
  2015-01-22 12:59                   ` Vlad Zolotarov
@ 2015-01-22 13:19                     ` Wodkowski, PawelX
  0 siblings, 0 replies; 144+ messages in thread
From: Wodkowski, PawelX @ 2015-01-22 13:19 UTC (permalink / raw)
  To: Vlad Zolotarov, Ouyang, Changchun, dev

> 
> I'm new on the list and my experience with DPDK is about two months so,
> pls., don't judge me too harsh... ;)
> I tried to cover the obvious things and actually learned the code while
> reviewing. The things u say, Pavel(X?) make sense and I obviously missed

I am really puzzled about mail client I have to use. It is really stubborn 
about using my correct name :P

> that.
> But as Changchun mentioned there is nothing that can't be fixed with a
> followup patches... ;)
> 
Roger that :P 
No judging, I should also look those patches before they were acked.

Waiting for fixes.

Pawel

^ permalink raw reply	[flat|nested] 144+ messages in thread

end of thread, other threads:[~2015-01-22 13:20 UTC | newest]

Thread overview: 144+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-12-15  2:56 [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic Ouyang Changchun
2014-12-15  2:57 ` [dpdk-dev] [PATCH 1/6] ixgbe: Code cleanup Ouyang Changchun
2014-12-15  2:57 ` [dpdk-dev] [PATCH 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
2014-12-15  2:57 ` [dpdk-dev] [PATCH 3/6] ixgbe: Get VF queue number Ouyang Changchun
2014-12-15  2:57 ` [dpdk-dev] [PATCH 4/6] ether: Check VMDq RSS mode Ouyang Changchun
2014-12-15  2:57 ` [dpdk-dev] [PATCH 5/6] ixgbe: Config VF RSS Ouyang Changchun
2014-12-15  2:57 ` [dpdk-dev] [PATCH 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
2014-12-15 10:55 ` [dpdk-dev] [PATCH 0/6] Enable VF RSS for Niantic Bruce Richardson
2014-12-16  0:58   ` Ouyang, Changchun
2014-12-24  2:56 ` [dpdk-dev] [PATCH v2 " Ouyang Changchun
2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 1/6] ixgbe: Code cleanup Ouyang Changchun
2014-12-24  3:08     ` Zhang, Helin
2014-12-24  3:22       ` Ouyang, Changchun
2014-12-24  3:41         ` Zhang, Helin
2014-12-24  3:50           ` Ouyang, Changchun
2014-12-24  3:53             ` Zhang, Helin
2014-12-24  4:46               ` Ouyang, Changchun
2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 3/6] ixgbe: Get VF queue number Ouyang Changchun
2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 4/6] ether: Check VMDq RSS mode Ouyang Changchun
2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 5/6] ixgbe: Config VF RSS Ouyang Changchun
2014-12-24  2:56   ` [dpdk-dev] [PATCH v2 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
2014-12-24  5:22   ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Ouyang Changchun
2014-12-24  5:22     ` [dpdk-dev] [PATCH v3 1/6] ixgbe: Code cleanup Ouyang Changchun
2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 3/6] ixgbe: Get VF queue number Ouyang Changchun
2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 4/6] ether: Check VMDq RSS mode Ouyang Changchun
2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS Ouyang Changchun
2014-12-24 10:39       ` Vlad Zolotarov
2014-12-25  2:14         ` Ouyang, Changchun
2014-12-25 13:13           ` Vlad Zolotarov
2014-12-26  2:07             ` Ouyang, Changchun
2014-12-25  2:43         ` Ouyang, Changchun
2014-12-25 13:20           ` Vlad Zolotarov
2014-12-26  1:52             ` Ouyang, Changchun
2014-12-26  6:49               ` Vladislav Zolotarov
2014-12-26  7:26                 ` Ouyang, Changchun
2014-12-26  7:37                   ` Vladislav Zolotarov
2014-12-26  8:45                     ` Ouyang, Changchun
2014-12-28 10:14                       ` Vlad Zolotarov
2015-01-05 10:29               ` Bruce Richardson
2015-01-06  1:00                 ` Ouyang, Changchun
2014-12-25 13:38           ` Vlad Zolotarov
2014-12-26  1:26             ` Ouyang, Changchun
2015-01-04  2:10       ` Liang, Cunming
2015-01-04  6:25         ` Ouyang, Changchun
2014-12-24  5:23     ` [dpdk-dev] [PATCH v3 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
2014-12-24  9:59     ` [dpdk-dev] [PATCH v3 0/6] Enable VF RSS for Niantic Vlad Zolotarov
2014-12-25  1:46       ` Ouyang, Changchun
2015-01-05 10:38         ` Bruce Richardson
2015-01-05 13:02           ` Vlad Zolotarov
2015-01-06  1:11             ` Ouyang, Changchun
2015-01-06 11:18               ` Vlad Zolotarov
2015-01-06 11:18               ` Vlad Zolotarov
2015-01-06  1:04           ` Ouyang, Changchun
2014-12-24 10:49     ` Vlad Zolotarov
2014-12-25  2:26       ` Ouyang, Changchun
2014-12-25 12:46         ` Vlad Zolotarov
2014-12-26  2:37           ` Ouyang, Changchun
     [not found]             ` <CAOYyTHbrB-VinN5ZEd1tYTnS7_GhCT1jiHiZzNKkQUEJ1rG79w@mail.gmail.com>
2014-12-26  5:16               ` Vladislav Zolotarov
2014-12-26  5:25                 ` Ouyang, Changchun
2015-01-04  7:18     ` [dpdk-dev] [PATCH v4 " Ouyang Changchun
2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 1/6] ixgbe: Code cleanup Ouyang Changchun
2015-01-04  8:22         ` Vlad Zolotarov
2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
2015-01-04  8:26         ` Vlad Zolotarov
2015-01-04  8:30           ` Vlad Zolotarov
2015-01-04  8:37             ` Ouyang, Changchun
2015-01-04  8:40               ` Vlad Zolotarov
2015-01-04  8:51                 ` Ouyang, Changchun
2015-01-04  9:37                   ` Vlad Zolotarov
2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 3/6] ixgbe: Get VF queue number Ouyang Changchun
2015-01-04  8:38         ` Vlad Zolotarov
2015-01-05  2:59           ` Ouyang, Changchun
2015-01-05 10:07             ` Vlad Zolotarov
2015-01-06  1:54               ` Ouyang, Changchun
2015-01-06 11:26                 ` Vlad Zolotarov
2015-01-07  1:18                   ` Ouyang, Changchun
2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 4/6] ether: Check VMDq RSS mode Ouyang Changchun
2015-01-04  8:45         ` Vlad Zolotarov
2015-01-04  8:58           ` Ouyang, Changchun
2015-01-04  9:45             ` Vlad Zolotarov
2015-01-05  1:00               ` Ouyang, Changchun
2015-01-05 10:09                 ` Vlad Zolotarov
2015-01-06  1:56                   ` Ouyang, Changchun
2015-01-06 19:56                     ` Vlad Zolotarov
2015-01-07  2:28                       ` Ouyang, Changchun
2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 5/6] ixgbe: Config VF RSS Ouyang Changchun
2015-01-04  7:18       ` [dpdk-dev] [PATCH v4 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
2015-01-04  8:49         ` Vlad Zolotarov
2015-01-04  9:01           ` Ouyang, Changchun
2015-01-04  9:46             ` Vlad Zolotarov
2015-01-05  2:38               ` Ouyang, Changchun
2015-01-05 10:12                 ` Vlad Zolotarov
2015-01-06  2:01                   ` Ouyang, Changchun
2015-01-06 12:53                     ` Vlad Zolotarov
2015-01-07  1:50                       ` Ouyang, Changchun
2015-01-07  6:32       ` [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic Ouyang Changchun
2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 1/6] ixgbe: Code cleanup Ouyang Changchun
2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 3/6] ixgbe: Get VF queue number Ouyang Changchun
2015-01-08  9:01           ` Vlad Zolotarov
2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 4/6] ether: Check VMDq RSS mode Ouyang Changchun
2015-01-08  9:19           ` Vlad Zolotarov
2015-01-08 18:48             ` Vlad Zolotarov
2015-01-09  5:54               ` Ouyang, Changchun
2015-01-09 13:49                 ` Vlad Zolotarov
2015-01-12  3:41                   ` Ouyang, Changchun
2015-01-12 13:58                     ` Vlad Zolotarov
2015-01-13  1:50                       ` Ouyang, Changchun
2015-01-13  9:00                         ` Vlad Zolotarov
2015-01-14  0:44                           ` Ouyang, Changchun
2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 5/6] ixgbe: Config VF RSS Ouyang Changchun
2015-01-08  9:43           ` Vlad Zolotarov
2015-01-09  6:07             ` Ouyang, Changchun
2015-01-09 14:01               ` Vlad Zolotarov
2015-01-12  5:11                 ` Ouyang, Changchun
2015-01-07  6:32         ` [dpdk-dev] [PATCH v5 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
2015-01-08  9:46           ` Vlad Zolotarov
2015-01-08  9:56         ` [dpdk-dev] [PATCH v5 0/6] Enable VF RSS for Niantic Vlad Zolotarov
2015-01-18 21:58           ` Thomas Monjalon
2015-01-19  9:40             ` Vlad Zolotarov
2015-01-12  5:59         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 1/6] ixgbe: Code cleanup Ouyang Changchun
2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 2/6] ixgbe: Negotiate VF API version Ouyang Changchun
2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 3/6] ixgbe: Get VF queue number Ouyang Changchun
2015-01-19  9:13             ` Wodkowski, PawelX
2015-01-20  0:54               ` Ouyang, Changchun
2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 4/6] ether: Check VMDq RSS mode Ouyang Changchun
2015-01-12 14:06             ` Vlad Zolotarov
2015-01-18 22:04             ` Thomas Monjalon
2015-01-19 10:31             ` Wodkowski, PawelX
2015-01-20  1:03               ` Ouyang, Changchun
2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 5/6] ixgbe: Config VF RSS Ouyang Changchun
2015-01-12 14:04             ` Vlad Zolotarov
2015-01-20  9:35             ` Wodkowski, PawelX
2015-01-21  2:43               ` Ouyang, Changchun
2015-01-21  8:44                 ` Wodkowski, PawelX
2015-01-22 12:59                   ` Vlad Zolotarov
2015-01-22 13:19                     ` Wodkowski, PawelX
2015-01-12  5:59           ` [dpdk-dev] [PATCH v6 6/6] testpmd: Set Rx VMDq RSS mode Ouyang Changchun
2015-01-12 14:05             ` Vlad Zolotarov
2015-01-18 22:24           ` [dpdk-dev] [PATCH v6 0/6] Enable VF RSS for Niantic Thomas Monjalon
2015-01-19  4:51             ` Ouyang, Changchun

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ https://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git